Coverage Report

Created: 2025-06-16 07:00

/src/libjxl/lib/jxl/enc_transforms-inl.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/base/compiler_specific.h"
7
#include "lib/jxl/frame_dimensions.h"
8
9
#if defined(LIB_JXL_ENC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
10
#ifdef LIB_JXL_ENC_TRANSFORMS_INL_H_
11
#undef LIB_JXL_ENC_TRANSFORMS_INL_H_
12
#else
13
#define LIB_JXL_ENC_TRANSFORMS_INL_H_
14
#endif
15
16
#include <cstddef>
17
#include <cstdint>
18
#include <hwy/highway.h>
19
20
#include "lib/jxl/ac_strategy.h"
21
#include "lib/jxl/dct-inl.h"
22
#include "lib/jxl/dct_scales.h"
23
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
27
enum class AcStrategyType : uint32_t;
28
29
namespace HWY_NAMESPACE {
30
namespace {
31
32
constexpr size_t kMaxBlocks = 32;
33
34
// Inverse of ReinterpretingDCT.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
HWY_INLINE void ReinterpretingIDCT(const float* input,
38
                                   const size_t input_stride, float* output,
39
195k
                                   const size_t output_stride, float* scratch) {
40
195k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
195k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
195k
  float* block = scratch;
43
195k
  if (ROWS < COLS) {
44
153k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
313k
      for (size_t x = 0; x < LF_COLS; x++) {
46
229k
        block[y * COLS + x] = input[y * input_stride + x] *
47
229k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
229k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
229k
      }
50
84.1k
    }
51
126k
  } else {
52
394k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.17M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
904k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
904k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
904k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
904k
      }
58
268k
    }
59
126k
  }
60
61
195k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
195k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
195k
                                  scratch_space);
64
195k
}
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
24.7k
                                   const size_t output_stride, float* scratch) {
40
24.7k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
24.7k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
24.7k
  float* block = scratch;
43
24.7k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
24.7k
  } else {
52
49.4k
    for (size_t y = 0; y < LF_COLS; y++) {
53
74.2k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
49.4k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
49.4k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
49.4k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
49.4k
      }
58
24.7k
    }
59
24.7k
  }
60
61
24.7k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
24.7k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
24.7k
                                  scratch_space);
64
24.7k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
27.7k
                                   const size_t output_stride, float* scratch) {
40
27.7k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
27.7k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
27.7k
  float* block = scratch;
43
27.7k
  if (ROWS < COLS) {
44
55.4k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
83.2k
      for (size_t x = 0; x < LF_COLS; x++) {
46
55.4k
        block[y * COLS + x] = input[y * input_stride + x] *
47
55.4k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
55.4k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
55.4k
      }
50
27.7k
    }
51
27.7k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
27.7k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
27.7k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
27.7k
                                  scratch_space);
64
27.7k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
19.1k
                                   const size_t output_stride, float* scratch) {
40
19.1k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
19.1k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
19.1k
  float* block = scratch;
43
19.1k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
19.1k
  } else {
52
57.4k
    for (size_t y = 0; y < LF_COLS; y++) {
53
114k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
76.6k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
76.6k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
76.6k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
76.6k
      }
58
38.3k
    }
59
19.1k
  }
60
61
19.1k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
19.1k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
19.1k
                                  scratch_space);
64
19.1k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
5.57k
                                   const size_t output_stride, float* scratch) {
40
5.57k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
5.57k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
5.57k
  float* block = scratch;
43
5.57k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
5.57k
  } else {
52
16.7k
    for (size_t y = 0; y < LF_COLS; y++) {
53
55.7k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
44.5k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
44.5k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
44.5k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
44.5k
      }
58
11.1k
    }
59
5.57k
  }
60
61
5.57k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
5.57k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
5.57k
                                  scratch_space);
64
5.57k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
6.93k
                                   const size_t output_stride, float* scratch) {
40
6.93k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
6.93k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
6.93k
  float* block = scratch;
43
6.93k
  if (ROWS < COLS) {
44
20.8k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
69.3k
      for (size_t x = 0; x < LF_COLS; x++) {
46
55.4k
        block[y * COLS + x] = input[y * input_stride + x] *
47
55.4k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
55.4k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
55.4k
      }
50
13.8k
    }
51
6.93k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
6.93k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
6.93k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
6.93k
                                  scratch_space);
64
6.93k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
12.3k
                                   const size_t output_stride, float* scratch) {
40
12.3k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
12.3k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
12.3k
  float* block = scratch;
43
12.3k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
12.3k
  } else {
52
61.6k
    for (size_t y = 0; y < LF_COLS; y++) {
53
246k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
197k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
197k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
197k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
197k
      }
58
49.3k
    }
59
12.3k
  }
60
61
12.3k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
12.3k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
12.3k
                                  scratch_space);
64
12.3k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
162
                                   const size_t output_stride, float* scratch) {
40
162
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
162
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
162
  float* block = scratch;
43
162
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
162
  } else {
52
810
    for (size_t y = 0; y < LF_COLS; y++) {
53
5.83k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
5.18k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
5.18k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
5.18k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
5.18k
      }
58
648
    }
59
162
  }
60
61
162
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
162
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
162
                                  scratch_space);
64
162
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
123
                                   const size_t output_stride, float* scratch) {
40
123
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
123
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
123
  float* block = scratch;
43
123
  if (ROWS < COLS) {
44
615
    for (size_t y = 0; y < LF_ROWS; y++) {
45
4.42k
      for (size_t x = 0; x < LF_COLS; x++) {
46
3.93k
        block[y * COLS + x] = input[y * input_stride + x] *
47
3.93k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
3.93k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
3.93k
      }
50
492
    }
51
123
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
123
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
123
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
123
                                  scratch_space);
64
123
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
1.23k
                                   const size_t output_stride, float* scratch) {
40
1.23k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
1.23k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
1.23k
  float* block = scratch;
43
1.23k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
1.23k
  } else {
52
11.0k
    for (size_t y = 0; y < LF_COLS; y++) {
53
88.7k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
78.9k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
78.9k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
78.9k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
78.9k
      }
58
9.86k
    }
59
1.23k
  }
60
61
1.23k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
1.23k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
1.23k
                                  scratch_space);
64
1.23k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
24.7k
                                   const size_t output_stride, float* scratch) {
40
24.7k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
24.7k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
24.7k
  float* block = scratch;
43
24.7k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
24.7k
  } else {
52
49.4k
    for (size_t y = 0; y < LF_COLS; y++) {
53
74.2k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
49.4k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
49.4k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
49.4k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
49.4k
      }
58
24.7k
    }
59
24.7k
  }
60
61
24.7k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
24.7k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
24.7k
                                  scratch_space);
64
24.7k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
27.7k
                                   const size_t output_stride, float* scratch) {
40
27.7k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
27.7k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
27.7k
  float* block = scratch;
43
27.7k
  if (ROWS < COLS) {
44
55.4k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
83.2k
      for (size_t x = 0; x < LF_COLS; x++) {
46
55.4k
        block[y * COLS + x] = input[y * input_stride + x] *
47
55.4k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
55.4k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
55.4k
      }
50
27.7k
    }
51
27.7k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
27.7k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
27.7k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
27.7k
                                  scratch_space);
64
27.7k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
19.1k
                                   const size_t output_stride, float* scratch) {
40
19.1k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
19.1k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
19.1k
  float* block = scratch;
43
19.1k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
19.1k
  } else {
52
57.4k
    for (size_t y = 0; y < LF_COLS; y++) {
53
114k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
76.6k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
76.6k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
76.6k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
76.6k
      }
58
38.3k
    }
59
19.1k
  }
60
61
19.1k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
19.1k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
19.1k
                                  scratch_space);
64
19.1k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
5.57k
                                   const size_t output_stride, float* scratch) {
40
5.57k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
5.57k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
5.57k
  float* block = scratch;
43
5.57k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
5.57k
  } else {
52
16.7k
    for (size_t y = 0; y < LF_COLS; y++) {
53
55.7k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
44.5k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
44.5k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
44.5k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
44.5k
      }
58
11.1k
    }
59
5.57k
  }
60
61
5.57k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
5.57k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
5.57k
                                  scratch_space);
64
5.57k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
6.93k
                                   const size_t output_stride, float* scratch) {
40
6.93k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
6.93k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
6.93k
  float* block = scratch;
43
6.93k
  if (ROWS < COLS) {
44
20.8k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
69.3k
      for (size_t x = 0; x < LF_COLS; x++) {
46
55.4k
        block[y * COLS + x] = input[y * input_stride + x] *
47
55.4k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
55.4k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
55.4k
      }
50
13.8k
    }
51
6.93k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
6.93k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
6.93k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
6.93k
                                  scratch_space);
64
6.93k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
12.3k
                                   const size_t output_stride, float* scratch) {
40
12.3k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
12.3k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
12.3k
  float* block = scratch;
43
12.3k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
12.3k
  } else {
52
61.6k
    for (size_t y = 0; y < LF_COLS; y++) {
53
246k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
197k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
197k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
197k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
197k
      }
58
49.3k
    }
59
12.3k
  }
60
61
12.3k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
12.3k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
12.3k
                                  scratch_space);
64
12.3k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
162
                                   const size_t output_stride, float* scratch) {
40
162
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
162
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
162
  float* block = scratch;
43
162
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
162
  } else {
52
810
    for (size_t y = 0; y < LF_COLS; y++) {
53
5.83k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
5.18k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
5.18k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
5.18k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
5.18k
      }
58
648
    }
59
162
  }
60
61
162
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
162
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
162
                                  scratch_space);
64
162
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
123
                                   const size_t output_stride, float* scratch) {
40
123
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
123
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
123
  float* block = scratch;
43
123
  if (ROWS < COLS) {
44
615
    for (size_t y = 0; y < LF_ROWS; y++) {
45
4.42k
      for (size_t x = 0; x < LF_COLS; x++) {
46
3.93k
        block[y * COLS + x] = input[y * input_stride + x] *
47
3.93k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
3.93k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
3.93k
      }
50
492
    }
51
123
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
123
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
123
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
123
                                  scratch_space);
64
123
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
1.23k
                                   const size_t output_stride, float* scratch) {
40
1.23k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
1.23k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
1.23k
  float* block = scratch;
43
1.23k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
1.23k
  } else {
52
11.0k
    for (size_t y = 0; y < LF_COLS; y++) {
53
88.7k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
78.9k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
78.9k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
78.9k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
78.9k
      }
58
9.86k
    }
59
1.23k
  }
60
61
1.23k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
1.23k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
1.23k
                                  scratch_space);
64
1.23k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
65
66
template <size_t S>
67
4.82M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
4.82M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
4.82M
  static_assert(S % 2 == 0, "S should be even");
70
4.82M
  float temp[kDCTBlockSize];
71
4.82M
  constexpr size_t num_2x2 = S / 2;
72
16.0M
  for (size_t y = 0; y < num_2x2; y++) {
73
45.0M
    for (size_t x = 0; x < num_2x2; x++) {
74
33.7M
      float c00 = block[y * 2 * stride + x * 2];
75
33.7M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
33.7M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
33.7M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
33.7M
      float r00 = c00 + c01 + c10 + c11;
79
33.7M
      float r01 = c00 + c01 - c10 - c11;
80
33.7M
      float r10 = c00 - c01 + c10 - c11;
81
33.7M
      float r11 = c00 - c01 - c10 + c11;
82
33.7M
      r00 *= 0.25f;
83
33.7M
      r01 *= 0.25f;
84
33.7M
      r10 *= 0.25f;
85
33.7M
      r11 *= 0.25f;
86
33.7M
      temp[y * kBlockDim + x] = r00;
87
33.7M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
33.7M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
33.7M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
33.7M
    }
91
11.2M
  }
92
27.3M
  for (size_t y = 0; y < S; y++) {
93
157M
    for (size_t x = 0; x < S; x++) {
94
135M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
135M
    }
96
22.5M
  }
97
4.82M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
206k
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
206k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
206k
  static_assert(S % 2 == 0, "S should be even");
70
206k
  float temp[kDCTBlockSize];
71
206k
  constexpr size_t num_2x2 = S / 2;
72
1.03M
  for (size_t y = 0; y < num_2x2; y++) {
73
4.13M
    for (size_t x = 0; x < num_2x2; x++) {
74
3.30M
      float c00 = block[y * 2 * stride + x * 2];
75
3.30M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
3.30M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
3.30M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
3.30M
      float r00 = c00 + c01 + c10 + c11;
79
3.30M
      float r01 = c00 + c01 - c10 - c11;
80
3.30M
      float r10 = c00 - c01 + c10 - c11;
81
3.30M
      float r11 = c00 - c01 - c10 + c11;
82
3.30M
      r00 *= 0.25f;
83
3.30M
      r01 *= 0.25f;
84
3.30M
      r10 *= 0.25f;
85
3.30M
      r11 *= 0.25f;
86
3.30M
      temp[y * kBlockDim + x] = r00;
87
3.30M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
3.30M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
3.30M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
3.30M
    }
91
826k
  }
92
1.85M
  for (size_t y = 0; y < S; y++) {
93
14.8M
    for (size_t x = 0; x < S; x++) {
94
13.2M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
13.2M
    }
96
1.65M
  }
97
206k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
206k
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
206k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
206k
  static_assert(S % 2 == 0, "S should be even");
70
206k
  float temp[kDCTBlockSize];
71
206k
  constexpr size_t num_2x2 = S / 2;
72
619k
  for (size_t y = 0; y < num_2x2; y++) {
73
1.23M
    for (size_t x = 0; x < num_2x2; x++) {
74
826k
      float c00 = block[y * 2 * stride + x * 2];
75
826k
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
826k
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
826k
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
826k
      float r00 = c00 + c01 + c10 + c11;
79
826k
      float r01 = c00 + c01 - c10 - c11;
80
826k
      float r10 = c00 - c01 + c10 - c11;
81
826k
      float r11 = c00 - c01 - c10 + c11;
82
826k
      r00 *= 0.25f;
83
826k
      r01 *= 0.25f;
84
826k
      r10 *= 0.25f;
85
826k
      r11 *= 0.25f;
86
826k
      temp[y * kBlockDim + x] = r00;
87
826k
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
826k
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
826k
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
826k
    }
91
413k
  }
92
1.03M
  for (size_t y = 0; y < S; y++) {
93
4.13M
    for (size_t x = 0; x < S; x++) {
94
3.30M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
3.30M
    }
96
826k
  }
97
206k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
206k
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
206k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
206k
  static_assert(S % 2 == 0, "S should be even");
70
206k
  float temp[kDCTBlockSize];
71
206k
  constexpr size_t num_2x2 = S / 2;
72
413k
  for (size_t y = 0; y < num_2x2; y++) {
73
413k
    for (size_t x = 0; x < num_2x2; x++) {
74
206k
      float c00 = block[y * 2 * stride + x * 2];
75
206k
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
206k
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
206k
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
206k
      float r00 = c00 + c01 + c10 + c11;
79
206k
      float r01 = c00 + c01 - c10 - c11;
80
206k
      float r10 = c00 - c01 + c10 - c11;
81
206k
      float r11 = c00 - c01 - c10 + c11;
82
206k
      r00 *= 0.25f;
83
206k
      r01 *= 0.25f;
84
206k
      r10 *= 0.25f;
85
206k
      r11 *= 0.25f;
86
206k
      temp[y * kBlockDim + x] = r00;
87
206k
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
206k
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
206k
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
206k
    }
91
206k
  }
92
619k
  for (size_t y = 0; y < S; y++) {
93
1.23M
    for (size_t x = 0; x < S; x++) {
94
826k
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
826k
    }
96
413k
  }
97
206k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
206k
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
206k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
206k
  static_assert(S % 2 == 0, "S should be even");
70
206k
  float temp[kDCTBlockSize];
71
206k
  constexpr size_t num_2x2 = S / 2;
72
1.03M
  for (size_t y = 0; y < num_2x2; y++) {
73
4.13M
    for (size_t x = 0; x < num_2x2; x++) {
74
3.30M
      float c00 = block[y * 2 * stride + x * 2];
75
3.30M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
3.30M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
3.30M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
3.30M
      float r00 = c00 + c01 + c10 + c11;
79
3.30M
      float r01 = c00 + c01 - c10 - c11;
80
3.30M
      float r10 = c00 - c01 + c10 - c11;
81
3.30M
      float r11 = c00 - c01 - c10 + c11;
82
3.30M
      r00 *= 0.25f;
83
3.30M
      r01 *= 0.25f;
84
3.30M
      r10 *= 0.25f;
85
3.30M
      r11 *= 0.25f;
86
3.30M
      temp[y * kBlockDim + x] = r00;
87
3.30M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
3.30M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
3.30M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
3.30M
    }
91
826k
  }
92
1.85M
  for (size_t y = 0; y < S; y++) {
93
14.8M
    for (size_t x = 0; x < S; x++) {
94
13.2M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
13.2M
    }
96
1.65M
  }
97
206k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
206k
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
206k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
206k
  static_assert(S % 2 == 0, "S should be even");
70
206k
  float temp[kDCTBlockSize];
71
206k
  constexpr size_t num_2x2 = S / 2;
72
619k
  for (size_t y = 0; y < num_2x2; y++) {
73
1.23M
    for (size_t x = 0; x < num_2x2; x++) {
74
826k
      float c00 = block[y * 2 * stride + x * 2];
75
826k
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
826k
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
826k
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
826k
      float r00 = c00 + c01 + c10 + c11;
79
826k
      float r01 = c00 + c01 - c10 - c11;
80
826k
      float r10 = c00 - c01 + c10 - c11;
81
826k
      float r11 = c00 - c01 - c10 + c11;
82
826k
      r00 *= 0.25f;
83
826k
      r01 *= 0.25f;
84
826k
      r10 *= 0.25f;
85
826k
      r11 *= 0.25f;
86
826k
      temp[y * kBlockDim + x] = r00;
87
826k
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
826k
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
826k
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
826k
    }
91
413k
  }
92
1.03M
  for (size_t y = 0; y < S; y++) {
93
4.13M
    for (size_t x = 0; x < S; x++) {
94
3.30M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
3.30M
    }
96
826k
  }
97
206k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
206k
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
206k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
206k
  static_assert(S % 2 == 0, "S should be even");
70
206k
  float temp[kDCTBlockSize];
71
206k
  constexpr size_t num_2x2 = S / 2;
72
413k
  for (size_t y = 0; y < num_2x2; y++) {
73
413k
    for (size_t x = 0; x < num_2x2; x++) {
74
206k
      float c00 = block[y * 2 * stride + x * 2];
75
206k
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
206k
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
206k
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
206k
      float r00 = c00 + c01 + c10 + c11;
79
206k
      float r01 = c00 + c01 - c10 - c11;
80
206k
      float r10 = c00 - c01 + c10 - c11;
81
206k
      float r11 = c00 - c01 - c10 + c11;
82
206k
      r00 *= 0.25f;
83
206k
      r01 *= 0.25f;
84
206k
      r10 *= 0.25f;
85
206k
      r11 *= 0.25f;
86
206k
      temp[y * kBlockDim + x] = r00;
87
206k
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
206k
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
206k
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
206k
    }
91
206k
  }
92
619k
  for (size_t y = 0; y < S; y++) {
93
1.23M
    for (size_t x = 0; x < S; x++) {
94
826k
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
826k
    }
96
413k
  }
97
206k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.19M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.19M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.19M
  static_assert(S % 2 == 0, "S should be even");
70
1.19M
  float temp[kDCTBlockSize];
71
1.19M
  constexpr size_t num_2x2 = S / 2;
72
5.97M
  for (size_t y = 0; y < num_2x2; y++) {
73
23.8M
    for (size_t x = 0; x < num_2x2; x++) {
74
19.1M
      float c00 = block[y * 2 * stride + x * 2];
75
19.1M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
19.1M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
19.1M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
19.1M
      float r00 = c00 + c01 + c10 + c11;
79
19.1M
      float r01 = c00 + c01 - c10 - c11;
80
19.1M
      float r10 = c00 - c01 + c10 - c11;
81
19.1M
      float r11 = c00 - c01 - c10 + c11;
82
19.1M
      r00 *= 0.25f;
83
19.1M
      r01 *= 0.25f;
84
19.1M
      r10 *= 0.25f;
85
19.1M
      r11 *= 0.25f;
86
19.1M
      temp[y * kBlockDim + x] = r00;
87
19.1M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
19.1M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
19.1M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
19.1M
    }
91
4.77M
  }
92
10.7M
  for (size_t y = 0; y < S; y++) {
93
85.9M
    for (size_t x = 0; x < S; x++) {
94
76.4M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
76.4M
    }
96
9.55M
  }
97
1.19M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.19M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.19M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.19M
  static_assert(S % 2 == 0, "S should be even");
70
1.19M
  float temp[kDCTBlockSize];
71
1.19M
  constexpr size_t num_2x2 = S / 2;
72
3.58M
  for (size_t y = 0; y < num_2x2; y++) {
73
7.16M
    for (size_t x = 0; x < num_2x2; x++) {
74
4.77M
      float c00 = block[y * 2 * stride + x * 2];
75
4.77M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
4.77M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
4.77M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
4.77M
      float r00 = c00 + c01 + c10 + c11;
79
4.77M
      float r01 = c00 + c01 - c10 - c11;
80
4.77M
      float r10 = c00 - c01 + c10 - c11;
81
4.77M
      float r11 = c00 - c01 - c10 + c11;
82
4.77M
      r00 *= 0.25f;
83
4.77M
      r01 *= 0.25f;
84
4.77M
      r10 *= 0.25f;
85
4.77M
      r11 *= 0.25f;
86
4.77M
      temp[y * kBlockDim + x] = r00;
87
4.77M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
4.77M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
4.77M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
4.77M
    }
91
2.38M
  }
92
5.97M
  for (size_t y = 0; y < S; y++) {
93
23.8M
    for (size_t x = 0; x < S; x++) {
94
19.1M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
19.1M
    }
96
4.77M
  }
97
1.19M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.19M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.19M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.19M
  static_assert(S % 2 == 0, "S should be even");
70
1.19M
  float temp[kDCTBlockSize];
71
1.19M
  constexpr size_t num_2x2 = S / 2;
72
2.38M
  for (size_t y = 0; y < num_2x2; y++) {
73
2.38M
    for (size_t x = 0; x < num_2x2; x++) {
74
1.19M
      float c00 = block[y * 2 * stride + x * 2];
75
1.19M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
1.19M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
1.19M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
1.19M
      float r00 = c00 + c01 + c10 + c11;
79
1.19M
      float r01 = c00 + c01 - c10 - c11;
80
1.19M
      float r10 = c00 - c01 + c10 - c11;
81
1.19M
      float r11 = c00 - c01 - c10 + c11;
82
1.19M
      r00 *= 0.25f;
83
1.19M
      r01 *= 0.25f;
84
1.19M
      r10 *= 0.25f;
85
1.19M
      r11 *= 0.25f;
86
1.19M
      temp[y * kBlockDim + x] = r00;
87
1.19M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
1.19M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
1.19M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
1.19M
    }
91
1.19M
  }
92
3.58M
  for (size_t y = 0; y < S; y++) {
93
7.16M
    for (size_t x = 0; x < S; x++) {
94
4.77M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
4.77M
    }
96
2.38M
  }
97
1.19M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
98
99
4.92M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
4.92M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
4.92M
      {
102
4.92M
          0.2500000000000000,
103
4.92M
          0.8769029297991420f,
104
4.92M
          0.0000000000000000,
105
4.92M
          0.0000000000000000,
106
4.92M
          0.0000000000000000,
107
4.92M
          -0.4105377591765233f,
108
4.92M
          0.0000000000000000,
109
4.92M
          0.0000000000000000,
110
4.92M
          0.0000000000000000,
111
4.92M
          0.0000000000000000,
112
4.92M
          0.0000000000000000,
113
4.92M
          0.0000000000000000,
114
4.92M
          0.0000000000000000,
115
4.92M
          0.0000000000000000,
116
4.92M
          0.0000000000000000,
117
4.92M
          0.0000000000000000,
118
4.92M
      },
119
4.92M
      {
120
4.92M
          0.2500000000000000,
121
4.92M
          0.2206518106944235f,
122
4.92M
          0.0000000000000000,
123
4.92M
          0.0000000000000000,
124
4.92M
          -0.7071067811865474f,
125
4.92M
          0.6235485373547691f,
126
4.92M
          0.0000000000000000,
127
4.92M
          0.0000000000000000,
128
4.92M
          0.0000000000000000,
129
4.92M
          0.0000000000000000,
130
4.92M
          0.0000000000000000,
131
4.92M
          0.0000000000000000,
132
4.92M
          0.0000000000000000,
133
4.92M
          0.0000000000000000,
134
4.92M
          0.0000000000000000,
135
4.92M
          0.0000000000000000,
136
4.92M
      },
137
4.92M
      {
138
4.92M
          0.2500000000000000,
139
4.92M
          -0.1014005039375376f,
140
4.92M
          0.4067007583026075f,
141
4.92M
          -0.2125574805828875f,
142
4.92M
          0.0000000000000000,
143
4.92M
          -0.0643507165794627f,
144
4.92M
          -0.4517556589999482f,
145
4.92M
          -0.3046847507248690f,
146
4.92M
          0.3017929516615495f,
147
4.92M
          0.4082482904638627f,
148
4.92M
          0.1747866975480809f,
149
4.92M
          -0.2110560104933578f,
150
4.92M
          -0.1426608480880726f,
151
4.92M
          -0.1381354035075859f,
152
4.92M
          -0.1743760259965107f,
153
4.92M
          0.1135498731499434f,
154
4.92M
      },
155
4.92M
      {
156
4.92M
          0.2500000000000000,
157
4.92M
          -0.1014005039375375f,
158
4.92M
          0.4444481661973445f,
159
4.92M
          0.3085497062849767f,
160
4.92M
          0.0000000000000000f,
161
4.92M
          -0.0643507165794627f,
162
4.92M
          0.1585450355184006f,
163
4.92M
          0.5112616136591823f,
164
4.92M
          0.2579236279634118f,
165
4.92M
          0.0000000000000000,
166
4.92M
          0.0812611176717539f,
167
4.92M
          0.1856718091610980f,
168
4.92M
          -0.3416446842253372f,
169
4.92M
          0.3302282550303788f,
170
4.92M
          0.0702790691196284f,
171
4.92M
          -0.0741750459581035f,
172
4.92M
      },
173
4.92M
      {
174
4.92M
          0.2500000000000000,
175
4.92M
          0.2206518106944236f,
176
4.92M
          0.0000000000000000,
177
4.92M
          0.0000000000000000,
178
4.92M
          0.7071067811865476f,
179
4.92M
          0.6235485373547694f,
180
4.92M
          0.0000000000000000,
181
4.92M
          0.0000000000000000,
182
4.92M
          0.0000000000000000,
183
4.92M
          0.0000000000000000,
184
4.92M
          0.0000000000000000,
185
4.92M
          0.0000000000000000,
186
4.92M
          0.0000000000000000,
187
4.92M
          0.0000000000000000,
188
4.92M
          0.0000000000000000,
189
4.92M
          0.0000000000000000,
190
4.92M
      },
191
4.92M
      {
192
4.92M
          0.2500000000000000,
193
4.92M
          -0.1014005039375378f,
194
4.92M
          0.0000000000000000,
195
4.92M
          0.4706702258572536f,
196
4.92M
          0.0000000000000000,
197
4.92M
          -0.0643507165794628f,
198
4.92M
          -0.0403851516082220f,
199
4.92M
          0.0000000000000000,
200
4.92M
          0.1627234014286620f,
201
4.92M
          0.0000000000000000,
202
4.92M
          0.0000000000000000,
203
4.92M
          0.0000000000000000,
204
4.92M
          0.7367497537172237f,
205
4.92M
          0.0875511500058708f,
206
4.92M
          -0.2921026642334881f,
207
4.92M
          0.1940289303259434f,
208
4.92M
      },
209
4.92M
      {
210
4.92M
          0.2500000000000000,
211
4.92M
          -0.1014005039375377f,
212
4.92M
          0.1957439937204294f,
213
4.92M
          -0.1621205195722993f,
214
4.92M
          0.0000000000000000,
215
4.92M
          -0.0643507165794628f,
216
4.92M
          0.0074182263792424f,
217
4.92M
          -0.2904801297289980f,
218
4.92M
          0.0952002265347504f,
219
4.92M
          0.0000000000000000,
220
4.92M
          -0.3675398009862027f,
221
4.92M
          0.4921585901373873f,
222
4.92M
          0.2462710772207515f,
223
4.92M
          -0.0794670660590957f,
224
4.92M
          0.3623817333531167f,
225
4.92M
          -0.4351904965232280f,
226
4.92M
      },
227
4.92M
      {
228
4.92M
          0.2500000000000000,
229
4.92M
          -0.1014005039375376f,
230
4.92M
          0.2929100136981264f,
231
4.92M
          0.0000000000000000,
232
4.92M
          0.0000000000000000,
233
4.92M
          -0.0643507165794627f,
234
4.92M
          0.3935103426921017f,
235
4.92M
          -0.0657870154914280f,
236
4.92M
          0.0000000000000000,
237
4.92M
          -0.4082482904638628f,
238
4.92M
          -0.3078822139579090f,
239
4.92M
          -0.3852501370925192f,
240
4.92M
          -0.0857401903551931f,
241
4.92M
          -0.4613374887461511f,
242
4.92M
          0.0000000000000000,
243
4.92M
          0.2191868483885747f,
244
4.92M
      },
245
4.92M
      {
246
4.92M
          0.2500000000000000,
247
4.92M
          -0.1014005039375376f,
248
4.92M
          -0.4067007583026072f,
249
4.92M
          -0.2125574805828705f,
250
4.92M
          0.0000000000000000,
251
4.92M
          -0.0643507165794627f,
252
4.92M
          -0.4517556589999464f,
253
4.92M
          0.3046847507248840f,
254
4.92M
          0.3017929516615503f,
255
4.92M
          -0.4082482904638635f,
256
4.92M
          -0.1747866975480813f,
257
4.92M
          0.2110560104933581f,
258
4.92M
          -0.1426608480880734f,
259
4.92M
          -0.1381354035075829f,
260
4.92M
          -0.1743760259965108f,
261
4.92M
          0.1135498731499426f,
262
4.92M
      },
263
4.92M
      {
264
4.92M
          0.2500000000000000,
265
4.92M
          -0.1014005039375377f,
266
4.92M
          -0.1957439937204287f,
267
4.92M
          -0.1621205195722833f,
268
4.92M
          0.0000000000000000,
269
4.92M
          -0.0643507165794628f,
270
4.92M
          0.0074182263792444f,
271
4.92M
          0.2904801297290076f,
272
4.92M
          0.0952002265347505f,
273
4.92M
          0.0000000000000000,
274
4.92M
          0.3675398009862011f,
275
4.92M
          -0.4921585901373891f,
276
4.92M
          0.2462710772207514f,
277
4.92M
          -0.0794670660591026f,
278
4.92M
          0.3623817333531165f,
279
4.92M
          -0.4351904965232251f,
280
4.92M
      },
281
4.92M
      {
282
4.92M
          0.2500000000000000,
283
4.92M
          -0.1014005039375375f,
284
4.92M
          0.0000000000000000,
285
4.92M
          -0.4706702258572528f,
286
4.92M
          0.0000000000000000,
287
4.92M
          -0.0643507165794627f,
288
4.92M
          0.1107416575309343f,
289
4.92M
          0.0000000000000000,
290
4.92M
          -0.1627234014286617f,
291
4.92M
          0.0000000000000000,
292
4.92M
          0.0000000000000000,
293
4.92M
          0.0000000000000000,
294
4.92M
          0.1488339922711357f,
295
4.92M
          0.4972464710953509f,
296
4.92M
          0.2921026642334879f,
297
4.92M
          0.5550443808910661f,
298
4.92M
      },
299
4.92M
      {
300
4.92M
          0.2500000000000000,
301
4.92M
          -0.1014005039375377f,
302
4.92M
          0.1137907446044809f,
303
4.92M
          -0.1464291867126764f,
304
4.92M
          0.0000000000000000,
305
4.92M
          -0.0643507165794628f,
306
4.92M
          0.0829816309488205f,
307
4.92M
          -0.2388977352334460f,
308
4.92M
          -0.3531238544981630f,
309
4.92M
          -0.4082482904638630f,
310
4.92M
          0.4826689115059883f,
311
4.92M
          0.1741941265991622f,
312
4.92M
          -0.0476868035022925f,
313
4.92M
          0.1253805944856366f,
314
4.92M
          -0.4326608024727445f,
315
4.92M
          -0.2546827712406646f,
316
4.92M
      },
317
4.92M
      {
318
4.92M
          0.2500000000000000,
319
4.92M
          -0.1014005039375377f,
320
4.92M
          -0.4444481661973438f,
321
4.92M
          0.3085497062849487f,
322
4.92M
          0.0000000000000000,
323
4.92M
          -0.0643507165794628f,
324
4.92M
          0.1585450355183970f,
325
4.92M
          -0.5112616136592012f,
326
4.92M
          0.2579236279634129f,
327
4.92M
          0.0000000000000000,
328
4.92M
          -0.0812611176717504f,
329
4.92M
          -0.1856718091610990f,
330
4.92M
          -0.3416446842253373f,
331
4.92M
          0.3302282550303805f,
332
4.92M
          0.0702790691196282f,
333
4.92M
          -0.0741750459581023f,
334
4.92M
      },
335
4.92M
      {
336
4.92M
          0.2500000000000000,
337
4.92M
          -0.1014005039375376f,
338
4.92M
          -0.2929100136981264f,
339
4.92M
          0.0000000000000000,
340
4.92M
          0.0000000000000000,
341
4.92M
          -0.0643507165794627f,
342
4.92M
          0.3935103426921022f,
343
4.92M
          0.0657870154914254f,
344
4.92M
          0.0000000000000000,
345
4.92M
          0.4082482904638634f,
346
4.92M
          0.3078822139579031f,
347
4.92M
          0.3852501370925211f,
348
4.92M
          -0.0857401903551927f,
349
4.92M
          -0.4613374887461554f,
350
4.92M
          0.0000000000000000,
351
4.92M
          0.2191868483885728f,
352
4.92M
      },
353
4.92M
      {
354
4.92M
          0.2500000000000000,
355
4.92M
          -0.1014005039375376f,
356
4.92M
          -0.1137907446044814f,
357
4.92M
          -0.1464291867126654f,
358
4.92M
          0.0000000000000000,
359
4.92M
          -0.0643507165794627f,
360
4.92M
          0.0829816309488214f,
361
4.92M
          0.2388977352334547f,
362
4.92M
          -0.3531238544981624f,
363
4.92M
          0.4082482904638630f,
364
4.92M
          -0.4826689115059858f,
365
4.92M
          -0.1741941265991621f,
366
4.92M
          -0.0476868035022928f,
367
4.92M
          0.1253805944856431f,
368
4.92M
          -0.4326608024727457f,
369
4.92M
          -0.2546827712406641f,
370
4.92M
      },
371
4.92M
      {
372
4.92M
          0.2500000000000000,
373
4.92M
          -0.1014005039375374f,
374
4.92M
          0.0000000000000000,
375
4.92M
          0.4251149611657548f,
376
4.92M
          0.0000000000000000,
377
4.92M
          -0.0643507165794626f,
378
4.92M
          -0.4517556589999480f,
379
4.92M
          0.0000000000000000,
380
4.92M
          -0.6035859033230976f,
381
4.92M
          0.0000000000000000,
382
4.92M
          0.0000000000000000,
383
4.92M
          0.0000000000000000,
384
4.92M
          -0.1426608480880724f,
385
4.92M
          -0.1381354035075845f,
386
4.92M
          0.3487520519930227f,
387
4.92M
          0.1135498731499429f,
388
4.92M
      },
389
4.92M
  };
390
391
4.92M
  const HWY_CAPPED(float, 16) d;
392
14.7M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
9.84M
    auto scalar = Zero(d);
394
167M
    for (size_t j = 0; j < 16; j++) {
395
157M
      auto px = Set(d, pixels[j]);
396
157M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
157M
      scalar = MulAdd(px, basis, scalar);
398
157M
    }
399
9.84M
    Store(scalar, d, coeffs + i);
400
9.84M
  }
401
4.92M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
73.1k
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
73.1k
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
73.1k
      {
102
73.1k
          0.2500000000000000,
103
73.1k
          0.8769029297991420f,
104
73.1k
          0.0000000000000000,
105
73.1k
          0.0000000000000000,
106
73.1k
          0.0000000000000000,
107
73.1k
          -0.4105377591765233f,
108
73.1k
          0.0000000000000000,
109
73.1k
          0.0000000000000000,
110
73.1k
          0.0000000000000000,
111
73.1k
          0.0000000000000000,
112
73.1k
          0.0000000000000000,
113
73.1k
          0.0000000000000000,
114
73.1k
          0.0000000000000000,
115
73.1k
          0.0000000000000000,
116
73.1k
          0.0000000000000000,
117
73.1k
          0.0000000000000000,
118
73.1k
      },
119
73.1k
      {
120
73.1k
          0.2500000000000000,
121
73.1k
          0.2206518106944235f,
122
73.1k
          0.0000000000000000,
123
73.1k
          0.0000000000000000,
124
73.1k
          -0.7071067811865474f,
125
73.1k
          0.6235485373547691f,
126
73.1k
          0.0000000000000000,
127
73.1k
          0.0000000000000000,
128
73.1k
          0.0000000000000000,
129
73.1k
          0.0000000000000000,
130
73.1k
          0.0000000000000000,
131
73.1k
          0.0000000000000000,
132
73.1k
          0.0000000000000000,
133
73.1k
          0.0000000000000000,
134
73.1k
          0.0000000000000000,
135
73.1k
          0.0000000000000000,
136
73.1k
      },
137
73.1k
      {
138
73.1k
          0.2500000000000000,
139
73.1k
          -0.1014005039375376f,
140
73.1k
          0.4067007583026075f,
141
73.1k
          -0.2125574805828875f,
142
73.1k
          0.0000000000000000,
143
73.1k
          -0.0643507165794627f,
144
73.1k
          -0.4517556589999482f,
145
73.1k
          -0.3046847507248690f,
146
73.1k
          0.3017929516615495f,
147
73.1k
          0.4082482904638627f,
148
73.1k
          0.1747866975480809f,
149
73.1k
          -0.2110560104933578f,
150
73.1k
          -0.1426608480880726f,
151
73.1k
          -0.1381354035075859f,
152
73.1k
          -0.1743760259965107f,
153
73.1k
          0.1135498731499434f,
154
73.1k
      },
155
73.1k
      {
156
73.1k
          0.2500000000000000,
157
73.1k
          -0.1014005039375375f,
158
73.1k
          0.4444481661973445f,
159
73.1k
          0.3085497062849767f,
160
73.1k
          0.0000000000000000f,
161
73.1k
          -0.0643507165794627f,
162
73.1k
          0.1585450355184006f,
163
73.1k
          0.5112616136591823f,
164
73.1k
          0.2579236279634118f,
165
73.1k
          0.0000000000000000,
166
73.1k
          0.0812611176717539f,
167
73.1k
          0.1856718091610980f,
168
73.1k
          -0.3416446842253372f,
169
73.1k
          0.3302282550303788f,
170
73.1k
          0.0702790691196284f,
171
73.1k
          -0.0741750459581035f,
172
73.1k
      },
173
73.1k
      {
174
73.1k
          0.2500000000000000,
175
73.1k
          0.2206518106944236f,
176
73.1k
          0.0000000000000000,
177
73.1k
          0.0000000000000000,
178
73.1k
          0.7071067811865476f,
179
73.1k
          0.6235485373547694f,
180
73.1k
          0.0000000000000000,
181
73.1k
          0.0000000000000000,
182
73.1k
          0.0000000000000000,
183
73.1k
          0.0000000000000000,
184
73.1k
          0.0000000000000000,
185
73.1k
          0.0000000000000000,
186
73.1k
          0.0000000000000000,
187
73.1k
          0.0000000000000000,
188
73.1k
          0.0000000000000000,
189
73.1k
          0.0000000000000000,
190
73.1k
      },
191
73.1k
      {
192
73.1k
          0.2500000000000000,
193
73.1k
          -0.1014005039375378f,
194
73.1k
          0.0000000000000000,
195
73.1k
          0.4706702258572536f,
196
73.1k
          0.0000000000000000,
197
73.1k
          -0.0643507165794628f,
198
73.1k
          -0.0403851516082220f,
199
73.1k
          0.0000000000000000,
200
73.1k
          0.1627234014286620f,
201
73.1k
          0.0000000000000000,
202
73.1k
          0.0000000000000000,
203
73.1k
          0.0000000000000000,
204
73.1k
          0.7367497537172237f,
205
73.1k
          0.0875511500058708f,
206
73.1k
          -0.2921026642334881f,
207
73.1k
          0.1940289303259434f,
208
73.1k
      },
209
73.1k
      {
210
73.1k
          0.2500000000000000,
211
73.1k
          -0.1014005039375377f,
212
73.1k
          0.1957439937204294f,
213
73.1k
          -0.1621205195722993f,
214
73.1k
          0.0000000000000000,
215
73.1k
          -0.0643507165794628f,
216
73.1k
          0.0074182263792424f,
217
73.1k
          -0.2904801297289980f,
218
73.1k
          0.0952002265347504f,
219
73.1k
          0.0000000000000000,
220
73.1k
          -0.3675398009862027f,
221
73.1k
          0.4921585901373873f,
222
73.1k
          0.2462710772207515f,
223
73.1k
          -0.0794670660590957f,
224
73.1k
          0.3623817333531167f,
225
73.1k
          -0.4351904965232280f,
226
73.1k
      },
227
73.1k
      {
228
73.1k
          0.2500000000000000,
229
73.1k
          -0.1014005039375376f,
230
73.1k
          0.2929100136981264f,
231
73.1k
          0.0000000000000000,
232
73.1k
          0.0000000000000000,
233
73.1k
          -0.0643507165794627f,
234
73.1k
          0.3935103426921017f,
235
73.1k
          -0.0657870154914280f,
236
73.1k
          0.0000000000000000,
237
73.1k
          -0.4082482904638628f,
238
73.1k
          -0.3078822139579090f,
239
73.1k
          -0.3852501370925192f,
240
73.1k
          -0.0857401903551931f,
241
73.1k
          -0.4613374887461511f,
242
73.1k
          0.0000000000000000,
243
73.1k
          0.2191868483885747f,
244
73.1k
      },
245
73.1k
      {
246
73.1k
          0.2500000000000000,
247
73.1k
          -0.1014005039375376f,
248
73.1k
          -0.4067007583026072f,
249
73.1k
          -0.2125574805828705f,
250
73.1k
          0.0000000000000000,
251
73.1k
          -0.0643507165794627f,
252
73.1k
          -0.4517556589999464f,
253
73.1k
          0.3046847507248840f,
254
73.1k
          0.3017929516615503f,
255
73.1k
          -0.4082482904638635f,
256
73.1k
          -0.1747866975480813f,
257
73.1k
          0.2110560104933581f,
258
73.1k
          -0.1426608480880734f,
259
73.1k
          -0.1381354035075829f,
260
73.1k
          -0.1743760259965108f,
261
73.1k
          0.1135498731499426f,
262
73.1k
      },
263
73.1k
      {
264
73.1k
          0.2500000000000000,
265
73.1k
          -0.1014005039375377f,
266
73.1k
          -0.1957439937204287f,
267
73.1k
          -0.1621205195722833f,
268
73.1k
          0.0000000000000000,
269
73.1k
          -0.0643507165794628f,
270
73.1k
          0.0074182263792444f,
271
73.1k
          0.2904801297290076f,
272
73.1k
          0.0952002265347505f,
273
73.1k
          0.0000000000000000,
274
73.1k
          0.3675398009862011f,
275
73.1k
          -0.4921585901373891f,
276
73.1k
          0.2462710772207514f,
277
73.1k
          -0.0794670660591026f,
278
73.1k
          0.3623817333531165f,
279
73.1k
          -0.4351904965232251f,
280
73.1k
      },
281
73.1k
      {
282
73.1k
          0.2500000000000000,
283
73.1k
          -0.1014005039375375f,
284
73.1k
          0.0000000000000000,
285
73.1k
          -0.4706702258572528f,
286
73.1k
          0.0000000000000000,
287
73.1k
          -0.0643507165794627f,
288
73.1k
          0.1107416575309343f,
289
73.1k
          0.0000000000000000,
290
73.1k
          -0.1627234014286617f,
291
73.1k
          0.0000000000000000,
292
73.1k
          0.0000000000000000,
293
73.1k
          0.0000000000000000,
294
73.1k
          0.1488339922711357f,
295
73.1k
          0.4972464710953509f,
296
73.1k
          0.2921026642334879f,
297
73.1k
          0.5550443808910661f,
298
73.1k
      },
299
73.1k
      {
300
73.1k
          0.2500000000000000,
301
73.1k
          -0.1014005039375377f,
302
73.1k
          0.1137907446044809f,
303
73.1k
          -0.1464291867126764f,
304
73.1k
          0.0000000000000000,
305
73.1k
          -0.0643507165794628f,
306
73.1k
          0.0829816309488205f,
307
73.1k
          -0.2388977352334460f,
308
73.1k
          -0.3531238544981630f,
309
73.1k
          -0.4082482904638630f,
310
73.1k
          0.4826689115059883f,
311
73.1k
          0.1741941265991622f,
312
73.1k
          -0.0476868035022925f,
313
73.1k
          0.1253805944856366f,
314
73.1k
          -0.4326608024727445f,
315
73.1k
          -0.2546827712406646f,
316
73.1k
      },
317
73.1k
      {
318
73.1k
          0.2500000000000000,
319
73.1k
          -0.1014005039375377f,
320
73.1k
          -0.4444481661973438f,
321
73.1k
          0.3085497062849487f,
322
73.1k
          0.0000000000000000,
323
73.1k
          -0.0643507165794628f,
324
73.1k
          0.1585450355183970f,
325
73.1k
          -0.5112616136592012f,
326
73.1k
          0.2579236279634129f,
327
73.1k
          0.0000000000000000,
328
73.1k
          -0.0812611176717504f,
329
73.1k
          -0.1856718091610990f,
330
73.1k
          -0.3416446842253373f,
331
73.1k
          0.3302282550303805f,
332
73.1k
          0.0702790691196282f,
333
73.1k
          -0.0741750459581023f,
334
73.1k
      },
335
73.1k
      {
336
73.1k
          0.2500000000000000,
337
73.1k
          -0.1014005039375376f,
338
73.1k
          -0.2929100136981264f,
339
73.1k
          0.0000000000000000,
340
73.1k
          0.0000000000000000,
341
73.1k
          -0.0643507165794627f,
342
73.1k
          0.3935103426921022f,
343
73.1k
          0.0657870154914254f,
344
73.1k
          0.0000000000000000,
345
73.1k
          0.4082482904638634f,
346
73.1k
          0.3078822139579031f,
347
73.1k
          0.3852501370925211f,
348
73.1k
          -0.0857401903551927f,
349
73.1k
          -0.4613374887461554f,
350
73.1k
          0.0000000000000000,
351
73.1k
          0.2191868483885728f,
352
73.1k
      },
353
73.1k
      {
354
73.1k
          0.2500000000000000,
355
73.1k
          -0.1014005039375376f,
356
73.1k
          -0.1137907446044814f,
357
73.1k
          -0.1464291867126654f,
358
73.1k
          0.0000000000000000,
359
73.1k
          -0.0643507165794627f,
360
73.1k
          0.0829816309488214f,
361
73.1k
          0.2388977352334547f,
362
73.1k
          -0.3531238544981624f,
363
73.1k
          0.4082482904638630f,
364
73.1k
          -0.4826689115059858f,
365
73.1k
          -0.1741941265991621f,
366
73.1k
          -0.0476868035022928f,
367
73.1k
          0.1253805944856431f,
368
73.1k
          -0.4326608024727457f,
369
73.1k
          -0.2546827712406641f,
370
73.1k
      },
371
73.1k
      {
372
73.1k
          0.2500000000000000,
373
73.1k
          -0.1014005039375374f,
374
73.1k
          0.0000000000000000,
375
73.1k
          0.4251149611657548f,
376
73.1k
          0.0000000000000000,
377
73.1k
          -0.0643507165794626f,
378
73.1k
          -0.4517556589999480f,
379
73.1k
          0.0000000000000000,
380
73.1k
          -0.6035859033230976f,
381
73.1k
          0.0000000000000000,
382
73.1k
          0.0000000000000000,
383
73.1k
          0.0000000000000000,
384
73.1k
          -0.1426608480880724f,
385
73.1k
          -0.1381354035075845f,
386
73.1k
          0.3487520519930227f,
387
73.1k
          0.1135498731499429f,
388
73.1k
      },
389
73.1k
  };
390
391
73.1k
  const HWY_CAPPED(float, 16) d;
392
219k
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
146k
    auto scalar = Zero(d);
394
2.48M
    for (size_t j = 0; j < 16; j++) {
395
2.33M
      auto px = Set(d, pixels[j]);
396
2.33M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
2.33M
      scalar = MulAdd(px, basis, scalar);
398
2.33M
    }
399
146k
    Store(scalar, d, coeffs + i);
400
146k
  }
401
73.1k
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
73.1k
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
73.1k
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
73.1k
      {
102
73.1k
          0.2500000000000000,
103
73.1k
          0.8769029297991420f,
104
73.1k
          0.0000000000000000,
105
73.1k
          0.0000000000000000,
106
73.1k
          0.0000000000000000,
107
73.1k
          -0.4105377591765233f,
108
73.1k
          0.0000000000000000,
109
73.1k
          0.0000000000000000,
110
73.1k
          0.0000000000000000,
111
73.1k
          0.0000000000000000,
112
73.1k
          0.0000000000000000,
113
73.1k
          0.0000000000000000,
114
73.1k
          0.0000000000000000,
115
73.1k
          0.0000000000000000,
116
73.1k
          0.0000000000000000,
117
73.1k
          0.0000000000000000,
118
73.1k
      },
119
73.1k
      {
120
73.1k
          0.2500000000000000,
121
73.1k
          0.2206518106944235f,
122
73.1k
          0.0000000000000000,
123
73.1k
          0.0000000000000000,
124
73.1k
          -0.7071067811865474f,
125
73.1k
          0.6235485373547691f,
126
73.1k
          0.0000000000000000,
127
73.1k
          0.0000000000000000,
128
73.1k
          0.0000000000000000,
129
73.1k
          0.0000000000000000,
130
73.1k
          0.0000000000000000,
131
73.1k
          0.0000000000000000,
132
73.1k
          0.0000000000000000,
133
73.1k
          0.0000000000000000,
134
73.1k
          0.0000000000000000,
135
73.1k
          0.0000000000000000,
136
73.1k
      },
137
73.1k
      {
138
73.1k
          0.2500000000000000,
139
73.1k
          -0.1014005039375376f,
140
73.1k
          0.4067007583026075f,
141
73.1k
          -0.2125574805828875f,
142
73.1k
          0.0000000000000000,
143
73.1k
          -0.0643507165794627f,
144
73.1k
          -0.4517556589999482f,
145
73.1k
          -0.3046847507248690f,
146
73.1k
          0.3017929516615495f,
147
73.1k
          0.4082482904638627f,
148
73.1k
          0.1747866975480809f,
149
73.1k
          -0.2110560104933578f,
150
73.1k
          -0.1426608480880726f,
151
73.1k
          -0.1381354035075859f,
152
73.1k
          -0.1743760259965107f,
153
73.1k
          0.1135498731499434f,
154
73.1k
      },
155
73.1k
      {
156
73.1k
          0.2500000000000000,
157
73.1k
          -0.1014005039375375f,
158
73.1k
          0.4444481661973445f,
159
73.1k
          0.3085497062849767f,
160
73.1k
          0.0000000000000000f,
161
73.1k
          -0.0643507165794627f,
162
73.1k
          0.1585450355184006f,
163
73.1k
          0.5112616136591823f,
164
73.1k
          0.2579236279634118f,
165
73.1k
          0.0000000000000000,
166
73.1k
          0.0812611176717539f,
167
73.1k
          0.1856718091610980f,
168
73.1k
          -0.3416446842253372f,
169
73.1k
          0.3302282550303788f,
170
73.1k
          0.0702790691196284f,
171
73.1k
          -0.0741750459581035f,
172
73.1k
      },
173
73.1k
      {
174
73.1k
          0.2500000000000000,
175
73.1k
          0.2206518106944236f,
176
73.1k
          0.0000000000000000,
177
73.1k
          0.0000000000000000,
178
73.1k
          0.7071067811865476f,
179
73.1k
          0.6235485373547694f,
180
73.1k
          0.0000000000000000,
181
73.1k
          0.0000000000000000,
182
73.1k
          0.0000000000000000,
183
73.1k
          0.0000000000000000,
184
73.1k
          0.0000000000000000,
185
73.1k
          0.0000000000000000,
186
73.1k
          0.0000000000000000,
187
73.1k
          0.0000000000000000,
188
73.1k
          0.0000000000000000,
189
73.1k
          0.0000000000000000,
190
73.1k
      },
191
73.1k
      {
192
73.1k
          0.2500000000000000,
193
73.1k
          -0.1014005039375378f,
194
73.1k
          0.0000000000000000,
195
73.1k
          0.4706702258572536f,
196
73.1k
          0.0000000000000000,
197
73.1k
          -0.0643507165794628f,
198
73.1k
          -0.0403851516082220f,
199
73.1k
          0.0000000000000000,
200
73.1k
          0.1627234014286620f,
201
73.1k
          0.0000000000000000,
202
73.1k
          0.0000000000000000,
203
73.1k
          0.0000000000000000,
204
73.1k
          0.7367497537172237f,
205
73.1k
          0.0875511500058708f,
206
73.1k
          -0.2921026642334881f,
207
73.1k
          0.1940289303259434f,
208
73.1k
      },
209
73.1k
      {
210
73.1k
          0.2500000000000000,
211
73.1k
          -0.1014005039375377f,
212
73.1k
          0.1957439937204294f,
213
73.1k
          -0.1621205195722993f,
214
73.1k
          0.0000000000000000,
215
73.1k
          -0.0643507165794628f,
216
73.1k
          0.0074182263792424f,
217
73.1k
          -0.2904801297289980f,
218
73.1k
          0.0952002265347504f,
219
73.1k
          0.0000000000000000,
220
73.1k
          -0.3675398009862027f,
221
73.1k
          0.4921585901373873f,
222
73.1k
          0.2462710772207515f,
223
73.1k
          -0.0794670660590957f,
224
73.1k
          0.3623817333531167f,
225
73.1k
          -0.4351904965232280f,
226
73.1k
      },
227
73.1k
      {
228
73.1k
          0.2500000000000000,
229
73.1k
          -0.1014005039375376f,
230
73.1k
          0.2929100136981264f,
231
73.1k
          0.0000000000000000,
232
73.1k
          0.0000000000000000,
233
73.1k
          -0.0643507165794627f,
234
73.1k
          0.3935103426921017f,
235
73.1k
          -0.0657870154914280f,
236
73.1k
          0.0000000000000000,
237
73.1k
          -0.4082482904638628f,
238
73.1k
          -0.3078822139579090f,
239
73.1k
          -0.3852501370925192f,
240
73.1k
          -0.0857401903551931f,
241
73.1k
          -0.4613374887461511f,
242
73.1k
          0.0000000000000000,
243
73.1k
          0.2191868483885747f,
244
73.1k
      },
245
73.1k
      {
246
73.1k
          0.2500000000000000,
247
73.1k
          -0.1014005039375376f,
248
73.1k
          -0.4067007583026072f,
249
73.1k
          -0.2125574805828705f,
250
73.1k
          0.0000000000000000,
251
73.1k
          -0.0643507165794627f,
252
73.1k
          -0.4517556589999464f,
253
73.1k
          0.3046847507248840f,
254
73.1k
          0.3017929516615503f,
255
73.1k
          -0.4082482904638635f,
256
73.1k
          -0.1747866975480813f,
257
73.1k
          0.2110560104933581f,
258
73.1k
          -0.1426608480880734f,
259
73.1k
          -0.1381354035075829f,
260
73.1k
          -0.1743760259965108f,
261
73.1k
          0.1135498731499426f,
262
73.1k
      },
263
73.1k
      {
264
73.1k
          0.2500000000000000,
265
73.1k
          -0.1014005039375377f,
266
73.1k
          -0.1957439937204287f,
267
73.1k
          -0.1621205195722833f,
268
73.1k
          0.0000000000000000,
269
73.1k
          -0.0643507165794628f,
270
73.1k
          0.0074182263792444f,
271
73.1k
          0.2904801297290076f,
272
73.1k
          0.0952002265347505f,
273
73.1k
          0.0000000000000000,
274
73.1k
          0.3675398009862011f,
275
73.1k
          -0.4921585901373891f,
276
73.1k
          0.2462710772207514f,
277
73.1k
          -0.0794670660591026f,
278
73.1k
          0.3623817333531165f,
279
73.1k
          -0.4351904965232251f,
280
73.1k
      },
281
73.1k
      {
282
73.1k
          0.2500000000000000,
283
73.1k
          -0.1014005039375375f,
284
73.1k
          0.0000000000000000,
285
73.1k
          -0.4706702258572528f,
286
73.1k
          0.0000000000000000,
287
73.1k
          -0.0643507165794627f,
288
73.1k
          0.1107416575309343f,
289
73.1k
          0.0000000000000000,
290
73.1k
          -0.1627234014286617f,
291
73.1k
          0.0000000000000000,
292
73.1k
          0.0000000000000000,
293
73.1k
          0.0000000000000000,
294
73.1k
          0.1488339922711357f,
295
73.1k
          0.4972464710953509f,
296
73.1k
          0.2921026642334879f,
297
73.1k
          0.5550443808910661f,
298
73.1k
      },
299
73.1k
      {
300
73.1k
          0.2500000000000000,
301
73.1k
          -0.1014005039375377f,
302
73.1k
          0.1137907446044809f,
303
73.1k
          -0.1464291867126764f,
304
73.1k
          0.0000000000000000,
305
73.1k
          -0.0643507165794628f,
306
73.1k
          0.0829816309488205f,
307
73.1k
          -0.2388977352334460f,
308
73.1k
          -0.3531238544981630f,
309
73.1k
          -0.4082482904638630f,
310
73.1k
          0.4826689115059883f,
311
73.1k
          0.1741941265991622f,
312
73.1k
          -0.0476868035022925f,
313
73.1k
          0.1253805944856366f,
314
73.1k
          -0.4326608024727445f,
315
73.1k
          -0.2546827712406646f,
316
73.1k
      },
317
73.1k
      {
318
73.1k
          0.2500000000000000,
319
73.1k
          -0.1014005039375377f,
320
73.1k
          -0.4444481661973438f,
321
73.1k
          0.3085497062849487f,
322
73.1k
          0.0000000000000000,
323
73.1k
          -0.0643507165794628f,
324
73.1k
          0.1585450355183970f,
325
73.1k
          -0.5112616136592012f,
326
73.1k
          0.2579236279634129f,
327
73.1k
          0.0000000000000000,
328
73.1k
          -0.0812611176717504f,
329
73.1k
          -0.1856718091610990f,
330
73.1k
          -0.3416446842253373f,
331
73.1k
          0.3302282550303805f,
332
73.1k
          0.0702790691196282f,
333
73.1k
          -0.0741750459581023f,
334
73.1k
      },
335
73.1k
      {
336
73.1k
          0.2500000000000000,
337
73.1k
          -0.1014005039375376f,
338
73.1k
          -0.2929100136981264f,
339
73.1k
          0.0000000000000000,
340
73.1k
          0.0000000000000000,
341
73.1k
          -0.0643507165794627f,
342
73.1k
          0.3935103426921022f,
343
73.1k
          0.0657870154914254f,
344
73.1k
          0.0000000000000000,
345
73.1k
          0.4082482904638634f,
346
73.1k
          0.3078822139579031f,
347
73.1k
          0.3852501370925211f,
348
73.1k
          -0.0857401903551927f,
349
73.1k
          -0.4613374887461554f,
350
73.1k
          0.0000000000000000,
351
73.1k
          0.2191868483885728f,
352
73.1k
      },
353
73.1k
      {
354
73.1k
          0.2500000000000000,
355
73.1k
          -0.1014005039375376f,
356
73.1k
          -0.1137907446044814f,
357
73.1k
          -0.1464291867126654f,
358
73.1k
          0.0000000000000000,
359
73.1k
          -0.0643507165794627f,
360
73.1k
          0.0829816309488214f,
361
73.1k
          0.2388977352334547f,
362
73.1k
          -0.3531238544981624f,
363
73.1k
          0.4082482904638630f,
364
73.1k
          -0.4826689115059858f,
365
73.1k
          -0.1741941265991621f,
366
73.1k
          -0.0476868035022928f,
367
73.1k
          0.1253805944856431f,
368
73.1k
          -0.4326608024727457f,
369
73.1k
          -0.2546827712406641f,
370
73.1k
      },
371
73.1k
      {
372
73.1k
          0.2500000000000000,
373
73.1k
          -0.1014005039375374f,
374
73.1k
          0.0000000000000000,
375
73.1k
          0.4251149611657548f,
376
73.1k
          0.0000000000000000,
377
73.1k
          -0.0643507165794626f,
378
73.1k
          -0.4517556589999480f,
379
73.1k
          0.0000000000000000,
380
73.1k
          -0.6035859033230976f,
381
73.1k
          0.0000000000000000,
382
73.1k
          0.0000000000000000,
383
73.1k
          0.0000000000000000,
384
73.1k
          -0.1426608480880724f,
385
73.1k
          -0.1381354035075845f,
386
73.1k
          0.3487520519930227f,
387
73.1k
          0.1135498731499429f,
388
73.1k
      },
389
73.1k
  };
390
391
73.1k
  const HWY_CAPPED(float, 16) d;
392
219k
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
146k
    auto scalar = Zero(d);
394
2.48M
    for (size_t j = 0; j < 16; j++) {
395
2.33M
      auto px = Set(d, pixels[j]);
396
2.33M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
2.33M
      scalar = MulAdd(px, basis, scalar);
398
2.33M
    }
399
146k
    Store(scalar, d, coeffs + i);
400
146k
  }
401
73.1k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
4.77M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
4.77M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
4.77M
      {
102
4.77M
          0.2500000000000000,
103
4.77M
          0.8769029297991420f,
104
4.77M
          0.0000000000000000,
105
4.77M
          0.0000000000000000,
106
4.77M
          0.0000000000000000,
107
4.77M
          -0.4105377591765233f,
108
4.77M
          0.0000000000000000,
109
4.77M
          0.0000000000000000,
110
4.77M
          0.0000000000000000,
111
4.77M
          0.0000000000000000,
112
4.77M
          0.0000000000000000,
113
4.77M
          0.0000000000000000,
114
4.77M
          0.0000000000000000,
115
4.77M
          0.0000000000000000,
116
4.77M
          0.0000000000000000,
117
4.77M
          0.0000000000000000,
118
4.77M
      },
119
4.77M
      {
120
4.77M
          0.2500000000000000,
121
4.77M
          0.2206518106944235f,
122
4.77M
          0.0000000000000000,
123
4.77M
          0.0000000000000000,
124
4.77M
          -0.7071067811865474f,
125
4.77M
          0.6235485373547691f,
126
4.77M
          0.0000000000000000,
127
4.77M
          0.0000000000000000,
128
4.77M
          0.0000000000000000,
129
4.77M
          0.0000000000000000,
130
4.77M
          0.0000000000000000,
131
4.77M
          0.0000000000000000,
132
4.77M
          0.0000000000000000,
133
4.77M
          0.0000000000000000,
134
4.77M
          0.0000000000000000,
135
4.77M
          0.0000000000000000,
136
4.77M
      },
137
4.77M
      {
138
4.77M
          0.2500000000000000,
139
4.77M
          -0.1014005039375376f,
140
4.77M
          0.4067007583026075f,
141
4.77M
          -0.2125574805828875f,
142
4.77M
          0.0000000000000000,
143
4.77M
          -0.0643507165794627f,
144
4.77M
          -0.4517556589999482f,
145
4.77M
          -0.3046847507248690f,
146
4.77M
          0.3017929516615495f,
147
4.77M
          0.4082482904638627f,
148
4.77M
          0.1747866975480809f,
149
4.77M
          -0.2110560104933578f,
150
4.77M
          -0.1426608480880726f,
151
4.77M
          -0.1381354035075859f,
152
4.77M
          -0.1743760259965107f,
153
4.77M
          0.1135498731499434f,
154
4.77M
      },
155
4.77M
      {
156
4.77M
          0.2500000000000000,
157
4.77M
          -0.1014005039375375f,
158
4.77M
          0.4444481661973445f,
159
4.77M
          0.3085497062849767f,
160
4.77M
          0.0000000000000000f,
161
4.77M
          -0.0643507165794627f,
162
4.77M
          0.1585450355184006f,
163
4.77M
          0.5112616136591823f,
164
4.77M
          0.2579236279634118f,
165
4.77M
          0.0000000000000000,
166
4.77M
          0.0812611176717539f,
167
4.77M
          0.1856718091610980f,
168
4.77M
          -0.3416446842253372f,
169
4.77M
          0.3302282550303788f,
170
4.77M
          0.0702790691196284f,
171
4.77M
          -0.0741750459581035f,
172
4.77M
      },
173
4.77M
      {
174
4.77M
          0.2500000000000000,
175
4.77M
          0.2206518106944236f,
176
4.77M
          0.0000000000000000,
177
4.77M
          0.0000000000000000,
178
4.77M
          0.7071067811865476f,
179
4.77M
          0.6235485373547694f,
180
4.77M
          0.0000000000000000,
181
4.77M
          0.0000000000000000,
182
4.77M
          0.0000000000000000,
183
4.77M
          0.0000000000000000,
184
4.77M
          0.0000000000000000,
185
4.77M
          0.0000000000000000,
186
4.77M
          0.0000000000000000,
187
4.77M
          0.0000000000000000,
188
4.77M
          0.0000000000000000,
189
4.77M
          0.0000000000000000,
190
4.77M
      },
191
4.77M
      {
192
4.77M
          0.2500000000000000,
193
4.77M
          -0.1014005039375378f,
194
4.77M
          0.0000000000000000,
195
4.77M
          0.4706702258572536f,
196
4.77M
          0.0000000000000000,
197
4.77M
          -0.0643507165794628f,
198
4.77M
          -0.0403851516082220f,
199
4.77M
          0.0000000000000000,
200
4.77M
          0.1627234014286620f,
201
4.77M
          0.0000000000000000,
202
4.77M
          0.0000000000000000,
203
4.77M
          0.0000000000000000,
204
4.77M
          0.7367497537172237f,
205
4.77M
          0.0875511500058708f,
206
4.77M
          -0.2921026642334881f,
207
4.77M
          0.1940289303259434f,
208
4.77M
      },
209
4.77M
      {
210
4.77M
          0.2500000000000000,
211
4.77M
          -0.1014005039375377f,
212
4.77M
          0.1957439937204294f,
213
4.77M
          -0.1621205195722993f,
214
4.77M
          0.0000000000000000,
215
4.77M
          -0.0643507165794628f,
216
4.77M
          0.0074182263792424f,
217
4.77M
          -0.2904801297289980f,
218
4.77M
          0.0952002265347504f,
219
4.77M
          0.0000000000000000,
220
4.77M
          -0.3675398009862027f,
221
4.77M
          0.4921585901373873f,
222
4.77M
          0.2462710772207515f,
223
4.77M
          -0.0794670660590957f,
224
4.77M
          0.3623817333531167f,
225
4.77M
          -0.4351904965232280f,
226
4.77M
      },
227
4.77M
      {
228
4.77M
          0.2500000000000000,
229
4.77M
          -0.1014005039375376f,
230
4.77M
          0.2929100136981264f,
231
4.77M
          0.0000000000000000,
232
4.77M
          0.0000000000000000,
233
4.77M
          -0.0643507165794627f,
234
4.77M
          0.3935103426921017f,
235
4.77M
          -0.0657870154914280f,
236
4.77M
          0.0000000000000000,
237
4.77M
          -0.4082482904638628f,
238
4.77M
          -0.3078822139579090f,
239
4.77M
          -0.3852501370925192f,
240
4.77M
          -0.0857401903551931f,
241
4.77M
          -0.4613374887461511f,
242
4.77M
          0.0000000000000000,
243
4.77M
          0.2191868483885747f,
244
4.77M
      },
245
4.77M
      {
246
4.77M
          0.2500000000000000,
247
4.77M
          -0.1014005039375376f,
248
4.77M
          -0.4067007583026072f,
249
4.77M
          -0.2125574805828705f,
250
4.77M
          0.0000000000000000,
251
4.77M
          -0.0643507165794627f,
252
4.77M
          -0.4517556589999464f,
253
4.77M
          0.3046847507248840f,
254
4.77M
          0.3017929516615503f,
255
4.77M
          -0.4082482904638635f,
256
4.77M
          -0.1747866975480813f,
257
4.77M
          0.2110560104933581f,
258
4.77M
          -0.1426608480880734f,
259
4.77M
          -0.1381354035075829f,
260
4.77M
          -0.1743760259965108f,
261
4.77M
          0.1135498731499426f,
262
4.77M
      },
263
4.77M
      {
264
4.77M
          0.2500000000000000,
265
4.77M
          -0.1014005039375377f,
266
4.77M
          -0.1957439937204287f,
267
4.77M
          -0.1621205195722833f,
268
4.77M
          0.0000000000000000,
269
4.77M
          -0.0643507165794628f,
270
4.77M
          0.0074182263792444f,
271
4.77M
          0.2904801297290076f,
272
4.77M
          0.0952002265347505f,
273
4.77M
          0.0000000000000000,
274
4.77M
          0.3675398009862011f,
275
4.77M
          -0.4921585901373891f,
276
4.77M
          0.2462710772207514f,
277
4.77M
          -0.0794670660591026f,
278
4.77M
          0.3623817333531165f,
279
4.77M
          -0.4351904965232251f,
280
4.77M
      },
281
4.77M
      {
282
4.77M
          0.2500000000000000,
283
4.77M
          -0.1014005039375375f,
284
4.77M
          0.0000000000000000,
285
4.77M
          -0.4706702258572528f,
286
4.77M
          0.0000000000000000,
287
4.77M
          -0.0643507165794627f,
288
4.77M
          0.1107416575309343f,
289
4.77M
          0.0000000000000000,
290
4.77M
          -0.1627234014286617f,
291
4.77M
          0.0000000000000000,
292
4.77M
          0.0000000000000000,
293
4.77M
          0.0000000000000000,
294
4.77M
          0.1488339922711357f,
295
4.77M
          0.4972464710953509f,
296
4.77M
          0.2921026642334879f,
297
4.77M
          0.5550443808910661f,
298
4.77M
      },
299
4.77M
      {
300
4.77M
          0.2500000000000000,
301
4.77M
          -0.1014005039375377f,
302
4.77M
          0.1137907446044809f,
303
4.77M
          -0.1464291867126764f,
304
4.77M
          0.0000000000000000,
305
4.77M
          -0.0643507165794628f,
306
4.77M
          0.0829816309488205f,
307
4.77M
          -0.2388977352334460f,
308
4.77M
          -0.3531238544981630f,
309
4.77M
          -0.4082482904638630f,
310
4.77M
          0.4826689115059883f,
311
4.77M
          0.1741941265991622f,
312
4.77M
          -0.0476868035022925f,
313
4.77M
          0.1253805944856366f,
314
4.77M
          -0.4326608024727445f,
315
4.77M
          -0.2546827712406646f,
316
4.77M
      },
317
4.77M
      {
318
4.77M
          0.2500000000000000,
319
4.77M
          -0.1014005039375377f,
320
4.77M
          -0.4444481661973438f,
321
4.77M
          0.3085497062849487f,
322
4.77M
          0.0000000000000000,
323
4.77M
          -0.0643507165794628f,
324
4.77M
          0.1585450355183970f,
325
4.77M
          -0.5112616136592012f,
326
4.77M
          0.2579236279634129f,
327
4.77M
          0.0000000000000000,
328
4.77M
          -0.0812611176717504f,
329
4.77M
          -0.1856718091610990f,
330
4.77M
          -0.3416446842253373f,
331
4.77M
          0.3302282550303805f,
332
4.77M
          0.0702790691196282f,
333
4.77M
          -0.0741750459581023f,
334
4.77M
      },
335
4.77M
      {
336
4.77M
          0.2500000000000000,
337
4.77M
          -0.1014005039375376f,
338
4.77M
          -0.2929100136981264f,
339
4.77M
          0.0000000000000000,
340
4.77M
          0.0000000000000000,
341
4.77M
          -0.0643507165794627f,
342
4.77M
          0.3935103426921022f,
343
4.77M
          0.0657870154914254f,
344
4.77M
          0.0000000000000000,
345
4.77M
          0.4082482904638634f,
346
4.77M
          0.3078822139579031f,
347
4.77M
          0.3852501370925211f,
348
4.77M
          -0.0857401903551927f,
349
4.77M
          -0.4613374887461554f,
350
4.77M
          0.0000000000000000,
351
4.77M
          0.2191868483885728f,
352
4.77M
      },
353
4.77M
      {
354
4.77M
          0.2500000000000000,
355
4.77M
          -0.1014005039375376f,
356
4.77M
          -0.1137907446044814f,
357
4.77M
          -0.1464291867126654f,
358
4.77M
          0.0000000000000000,
359
4.77M
          -0.0643507165794627f,
360
4.77M
          0.0829816309488214f,
361
4.77M
          0.2388977352334547f,
362
4.77M
          -0.3531238544981624f,
363
4.77M
          0.4082482904638630f,
364
4.77M
          -0.4826689115059858f,
365
4.77M
          -0.1741941265991621f,
366
4.77M
          -0.0476868035022928f,
367
4.77M
          0.1253805944856431f,
368
4.77M
          -0.4326608024727457f,
369
4.77M
          -0.2546827712406641f,
370
4.77M
      },
371
4.77M
      {
372
4.77M
          0.2500000000000000,
373
4.77M
          -0.1014005039375374f,
374
4.77M
          0.0000000000000000,
375
4.77M
          0.4251149611657548f,
376
4.77M
          0.0000000000000000,
377
4.77M
          -0.0643507165794626f,
378
4.77M
          -0.4517556589999480f,
379
4.77M
          0.0000000000000000,
380
4.77M
          -0.6035859033230976f,
381
4.77M
          0.0000000000000000,
382
4.77M
          0.0000000000000000,
383
4.77M
          0.0000000000000000,
384
4.77M
          -0.1426608480880724f,
385
4.77M
          -0.1381354035075845f,
386
4.77M
          0.3487520519930227f,
387
4.77M
          0.1135498731499429f,
388
4.77M
      },
389
4.77M
  };
390
391
4.77M
  const HWY_CAPPED(float, 16) d;
392
14.3M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
9.55M
    auto scalar = Zero(d);
394
162M
    for (size_t j = 0; j < 16; j++) {
395
152M
      auto px = Set(d, pixels[j]);
396
152M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
152M
      scalar = MulAdd(px, basis, scalar);
398
152M
    }
399
9.55M
    Store(scalar, d, coeffs + i);
400
9.55M
  }
401
4.77M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
402
403
// Coefficient layout:
404
//  - (even, even) positions hold AFV coefficients
405
//  - (odd, even) positions hold DCT4x4 coefficients
406
//  - (any, odd) positions hold DCT4x8 coefficients
407
template <size_t afv_kind>
408
void AFVTransformFromPixels(const float* JXL_RESTRICT pixels,
409
                            size_t pixels_stride,
410
4.92M
                            float* JXL_RESTRICT coefficients) {
411
4.92M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
4.92M
  size_t afv_x = afv_kind & 1;
413
4.92M
  size_t afv_y = afv_kind / 2;
414
4.92M
  HWY_ALIGN float block[4 * 8] = {};
415
24.6M
  for (size_t iy = 0; iy < 4; iy++) {
416
98.4M
    for (size_t ix = 0; ix < 4; ix++) {
417
78.7M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
78.7M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
78.7M
    }
420
19.6M
  }
421
  // AFV coefficients in (even, even) positions.
422
4.92M
  HWY_ALIGN float coeff[4 * 4];
423
4.92M
  AFVDCT4x4(block, coeff);
424
24.6M
  for (size_t iy = 0; iy < 4; iy++) {
425
98.4M
    for (size_t ix = 0; ix < 4; ix++) {
426
78.7M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
78.7M
    }
428
19.6M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
4.92M
  ComputeScaledDCT<4, 4>()(
431
4.92M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
4.92M
              pixels_stride),
433
4.92M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
24.6M
  for (size_t iy = 0; iy < 4; iy++) {
436
177M
    for (size_t ix = 0; ix < 8; ix++) {
437
157M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
157M
    }
439
19.6M
  }
440
  // 4x8 DCT of the other half of the block.
441
4.92M
  ComputeScaledDCT<4, 8>()(
442
4.92M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
4.92M
      block, scratch_space);
444
24.6M
  for (size_t iy = 0; iy < 4; iy++) {
445
177M
    for (size_t ix = 0; ix < 8; ix++) {
446
157M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
157M
    }
448
19.6M
  }
449
4.92M
  float block00 = coefficients[0] * 0.25f;
450
4.92M
  float block01 = coefficients[1];
451
4.92M
  float block10 = coefficients[8];
452
4.92M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
4.92M
  coefficients[1] = (block00 - block01) * 0.5f;
454
4.92M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
4.92M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
20.0k
                            float* JXL_RESTRICT coefficients) {
411
20.0k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
20.0k
  size_t afv_x = afv_kind & 1;
413
20.0k
  size_t afv_y = afv_kind / 2;
414
20.0k
  HWY_ALIGN float block[4 * 8] = {};
415
100k
  for (size_t iy = 0; iy < 4; iy++) {
416
400k
    for (size_t ix = 0; ix < 4; ix++) {
417
320k
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
320k
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
320k
    }
420
80.0k
  }
421
  // AFV coefficients in (even, even) positions.
422
20.0k
  HWY_ALIGN float coeff[4 * 4];
423
20.0k
  AFVDCT4x4(block, coeff);
424
100k
  for (size_t iy = 0; iy < 4; iy++) {
425
400k
    for (size_t ix = 0; ix < 4; ix++) {
426
320k
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
320k
    }
428
80.0k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
20.0k
  ComputeScaledDCT<4, 4>()(
431
20.0k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
20.0k
              pixels_stride),
433
20.0k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
100k
  for (size_t iy = 0; iy < 4; iy++) {
436
720k
    for (size_t ix = 0; ix < 8; ix++) {
437
640k
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
640k
    }
439
80.0k
  }
440
  // 4x8 DCT of the other half of the block.
441
20.0k
  ComputeScaledDCT<4, 8>()(
442
20.0k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
20.0k
      block, scratch_space);
444
100k
  for (size_t iy = 0; iy < 4; iy++) {
445
720k
    for (size_t ix = 0; ix < 8; ix++) {
446
640k
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
640k
    }
448
80.0k
  }
449
20.0k
  float block00 = coefficients[0] * 0.25f;
450
20.0k
  float block01 = coefficients[1];
451
20.0k
  float block10 = coefficients[8];
452
20.0k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
20.0k
  coefficients[1] = (block00 - block01) * 0.5f;
454
20.0k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
20.0k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
14.4k
                            float* JXL_RESTRICT coefficients) {
411
14.4k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
14.4k
  size_t afv_x = afv_kind & 1;
413
14.4k
  size_t afv_y = afv_kind / 2;
414
14.4k
  HWY_ALIGN float block[4 * 8] = {};
415
72.1k
  for (size_t iy = 0; iy < 4; iy++) {
416
288k
    for (size_t ix = 0; ix < 4; ix++) {
417
230k
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
230k
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
230k
    }
420
57.6k
  }
421
  // AFV coefficients in (even, even) positions.
422
14.4k
  HWY_ALIGN float coeff[4 * 4];
423
14.4k
  AFVDCT4x4(block, coeff);
424
72.1k
  for (size_t iy = 0; iy < 4; iy++) {
425
288k
    for (size_t ix = 0; ix < 4; ix++) {
426
230k
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
230k
    }
428
57.6k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
14.4k
  ComputeScaledDCT<4, 4>()(
431
14.4k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
14.4k
              pixels_stride),
433
14.4k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
72.1k
  for (size_t iy = 0; iy < 4; iy++) {
436
519k
    for (size_t ix = 0; ix < 8; ix++) {
437
461k
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
461k
    }
439
57.6k
  }
440
  // 4x8 DCT of the other half of the block.
441
14.4k
  ComputeScaledDCT<4, 8>()(
442
14.4k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
14.4k
      block, scratch_space);
444
72.1k
  for (size_t iy = 0; iy < 4; iy++) {
445
519k
    for (size_t ix = 0; ix < 8; ix++) {
446
461k
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
461k
    }
448
57.6k
  }
449
14.4k
  float block00 = coefficients[0] * 0.25f;
450
14.4k
  float block01 = coefficients[1];
451
14.4k
  float block10 = coefficients[8];
452
14.4k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
14.4k
  coefficients[1] = (block00 - block01) * 0.5f;
454
14.4k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
14.4k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
15.6k
                            float* JXL_RESTRICT coefficients) {
411
15.6k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
15.6k
  size_t afv_x = afv_kind & 1;
413
15.6k
  size_t afv_y = afv_kind / 2;
414
15.6k
  HWY_ALIGN float block[4 * 8] = {};
415
78.3k
  for (size_t iy = 0; iy < 4; iy++) {
416
313k
    for (size_t ix = 0; ix < 4; ix++) {
417
250k
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
250k
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
250k
    }
420
62.6k
  }
421
  // AFV coefficients in (even, even) positions.
422
15.6k
  HWY_ALIGN float coeff[4 * 4];
423
15.6k
  AFVDCT4x4(block, coeff);
424
78.3k
  for (size_t iy = 0; iy < 4; iy++) {
425
313k
    for (size_t ix = 0; ix < 4; ix++) {
426
250k
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
250k
    }
428
62.6k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
15.6k
  ComputeScaledDCT<4, 4>()(
431
15.6k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
15.6k
              pixels_stride),
433
15.6k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
78.3k
  for (size_t iy = 0; iy < 4; iy++) {
436
563k
    for (size_t ix = 0; ix < 8; ix++) {
437
501k
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
501k
    }
439
62.6k
  }
440
  // 4x8 DCT of the other half of the block.
441
15.6k
  ComputeScaledDCT<4, 8>()(
442
15.6k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
15.6k
      block, scratch_space);
444
78.3k
  for (size_t iy = 0; iy < 4; iy++) {
445
563k
    for (size_t ix = 0; ix < 8; ix++) {
446
501k
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
501k
    }
448
62.6k
  }
449
15.6k
  float block00 = coefficients[0] * 0.25f;
450
15.6k
  float block01 = coefficients[1];
451
15.6k
  float block10 = coefficients[8];
452
15.6k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
15.6k
  coefficients[1] = (block00 - block01) * 0.5f;
454
15.6k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
15.6k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
23.0k
                            float* JXL_RESTRICT coefficients) {
411
23.0k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
23.0k
  size_t afv_x = afv_kind & 1;
413
23.0k
  size_t afv_y = afv_kind / 2;
414
23.0k
  HWY_ALIGN float block[4 * 8] = {};
415
115k
  for (size_t iy = 0; iy < 4; iy++) {
416
460k
    for (size_t ix = 0; ix < 4; ix++) {
417
368k
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
368k
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
368k
    }
420
92.0k
  }
421
  // AFV coefficients in (even, even) positions.
422
23.0k
  HWY_ALIGN float coeff[4 * 4];
423
23.0k
  AFVDCT4x4(block, coeff);
424
115k
  for (size_t iy = 0; iy < 4; iy++) {
425
460k
    for (size_t ix = 0; ix < 4; ix++) {
426
368k
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
368k
    }
428
92.0k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
23.0k
  ComputeScaledDCT<4, 4>()(
431
23.0k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
23.0k
              pixels_stride),
433
23.0k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
115k
  for (size_t iy = 0; iy < 4; iy++) {
436
828k
    for (size_t ix = 0; ix < 8; ix++) {
437
736k
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
736k
    }
439
92.0k
  }
440
  // 4x8 DCT of the other half of the block.
441
23.0k
  ComputeScaledDCT<4, 8>()(
442
23.0k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
23.0k
      block, scratch_space);
444
115k
  for (size_t iy = 0; iy < 4; iy++) {
445
828k
    for (size_t ix = 0; ix < 8; ix++) {
446
736k
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
736k
    }
448
92.0k
  }
449
23.0k
  float block00 = coefficients[0] * 0.25f;
450
23.0k
  float block01 = coefficients[1];
451
23.0k
  float block10 = coefficients[8];
452
23.0k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
23.0k
  coefficients[1] = (block00 - block01) * 0.5f;
454
23.0k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
23.0k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
20.0k
                            float* JXL_RESTRICT coefficients) {
411
20.0k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
20.0k
  size_t afv_x = afv_kind & 1;
413
20.0k
  size_t afv_y = afv_kind / 2;
414
20.0k
  HWY_ALIGN float block[4 * 8] = {};
415
100k
  for (size_t iy = 0; iy < 4; iy++) {
416
400k
    for (size_t ix = 0; ix < 4; ix++) {
417
320k
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
320k
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
320k
    }
420
80.0k
  }
421
  // AFV coefficients in (even, even) positions.
422
20.0k
  HWY_ALIGN float coeff[4 * 4];
423
20.0k
  AFVDCT4x4(block, coeff);
424
100k
  for (size_t iy = 0; iy < 4; iy++) {
425
400k
    for (size_t ix = 0; ix < 4; ix++) {
426
320k
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
320k
    }
428
80.0k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
20.0k
  ComputeScaledDCT<4, 4>()(
431
20.0k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
20.0k
              pixels_stride),
433
20.0k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
100k
  for (size_t iy = 0; iy < 4; iy++) {
436
720k
    for (size_t ix = 0; ix < 8; ix++) {
437
640k
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
640k
    }
439
80.0k
  }
440
  // 4x8 DCT of the other half of the block.
441
20.0k
  ComputeScaledDCT<4, 8>()(
442
20.0k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
20.0k
      block, scratch_space);
444
100k
  for (size_t iy = 0; iy < 4; iy++) {
445
720k
    for (size_t ix = 0; ix < 8; ix++) {
446
640k
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
640k
    }
448
80.0k
  }
449
20.0k
  float block00 = coefficients[0] * 0.25f;
450
20.0k
  float block01 = coefficients[1];
451
20.0k
  float block10 = coefficients[8];
452
20.0k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
20.0k
  coefficients[1] = (block00 - block01) * 0.5f;
454
20.0k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
20.0k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
14.4k
                            float* JXL_RESTRICT coefficients) {
411
14.4k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
14.4k
  size_t afv_x = afv_kind & 1;
413
14.4k
  size_t afv_y = afv_kind / 2;
414
14.4k
  HWY_ALIGN float block[4 * 8] = {};
415
72.1k
  for (size_t iy = 0; iy < 4; iy++) {
416
288k
    for (size_t ix = 0; ix < 4; ix++) {
417
230k
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
230k
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
230k
    }
420
57.6k
  }
421
  // AFV coefficients in (even, even) positions.
422
14.4k
  HWY_ALIGN float coeff[4 * 4];
423
14.4k
  AFVDCT4x4(block, coeff);
424
72.1k
  for (size_t iy = 0; iy < 4; iy++) {
425
288k
    for (size_t ix = 0; ix < 4; ix++) {
426
230k
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
230k
    }
428
57.6k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
14.4k
  ComputeScaledDCT<4, 4>()(
431
14.4k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
14.4k
              pixels_stride),
433
14.4k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
72.1k
  for (size_t iy = 0; iy < 4; iy++) {
436
519k
    for (size_t ix = 0; ix < 8; ix++) {
437
461k
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
461k
    }
439
57.6k
  }
440
  // 4x8 DCT of the other half of the block.
441
14.4k
  ComputeScaledDCT<4, 8>()(
442
14.4k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
14.4k
      block, scratch_space);
444
72.1k
  for (size_t iy = 0; iy < 4; iy++) {
445
519k
    for (size_t ix = 0; ix < 8; ix++) {
446
461k
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
461k
    }
448
57.6k
  }
449
14.4k
  float block00 = coefficients[0] * 0.25f;
450
14.4k
  float block01 = coefficients[1];
451
14.4k
  float block10 = coefficients[8];
452
14.4k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
14.4k
  coefficients[1] = (block00 - block01) * 0.5f;
454
14.4k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
14.4k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
15.6k
                            float* JXL_RESTRICT coefficients) {
411
15.6k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
15.6k
  size_t afv_x = afv_kind & 1;
413
15.6k
  size_t afv_y = afv_kind / 2;
414
15.6k
  HWY_ALIGN float block[4 * 8] = {};
415
78.3k
  for (size_t iy = 0; iy < 4; iy++) {
416
313k
    for (size_t ix = 0; ix < 4; ix++) {
417
250k
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
250k
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
250k
    }
420
62.6k
  }
421
  // AFV coefficients in (even, even) positions.
422
15.6k
  HWY_ALIGN float coeff[4 * 4];
423
15.6k
  AFVDCT4x4(block, coeff);
424
78.3k
  for (size_t iy = 0; iy < 4; iy++) {
425
313k
    for (size_t ix = 0; ix < 4; ix++) {
426
250k
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
250k
    }
428
62.6k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
15.6k
  ComputeScaledDCT<4, 4>()(
431
15.6k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
15.6k
              pixels_stride),
433
15.6k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
78.3k
  for (size_t iy = 0; iy < 4; iy++) {
436
563k
    for (size_t ix = 0; ix < 8; ix++) {
437
501k
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
501k
    }
439
62.6k
  }
440
  // 4x8 DCT of the other half of the block.
441
15.6k
  ComputeScaledDCT<4, 8>()(
442
15.6k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
15.6k
      block, scratch_space);
444
78.3k
  for (size_t iy = 0; iy < 4; iy++) {
445
563k
    for (size_t ix = 0; ix < 8; ix++) {
446
501k
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
501k
    }
448
62.6k
  }
449
15.6k
  float block00 = coefficients[0] * 0.25f;
450
15.6k
  float block01 = coefficients[1];
451
15.6k
  float block10 = coefficients[8];
452
15.6k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
15.6k
  coefficients[1] = (block00 - block01) * 0.5f;
454
15.6k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
15.6k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
23.0k
                            float* JXL_RESTRICT coefficients) {
411
23.0k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
23.0k
  size_t afv_x = afv_kind & 1;
413
23.0k
  size_t afv_y = afv_kind / 2;
414
23.0k
  HWY_ALIGN float block[4 * 8] = {};
415
115k
  for (size_t iy = 0; iy < 4; iy++) {
416
460k
    for (size_t ix = 0; ix < 4; ix++) {
417
368k
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
368k
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
368k
    }
420
92.0k
  }
421
  // AFV coefficients in (even, even) positions.
422
23.0k
  HWY_ALIGN float coeff[4 * 4];
423
23.0k
  AFVDCT4x4(block, coeff);
424
115k
  for (size_t iy = 0; iy < 4; iy++) {
425
460k
    for (size_t ix = 0; ix < 4; ix++) {
426
368k
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
368k
    }
428
92.0k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
23.0k
  ComputeScaledDCT<4, 4>()(
431
23.0k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
23.0k
              pixels_stride),
433
23.0k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
115k
  for (size_t iy = 0; iy < 4; iy++) {
436
828k
    for (size_t ix = 0; ix < 8; ix++) {
437
736k
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
736k
    }
439
92.0k
  }
440
  // 4x8 DCT of the other half of the block.
441
23.0k
  ComputeScaledDCT<4, 8>()(
442
23.0k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
23.0k
      block, scratch_space);
444
115k
  for (size_t iy = 0; iy < 4; iy++) {
445
828k
    for (size_t ix = 0; ix < 8; ix++) {
446
736k
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
736k
    }
448
92.0k
  }
449
23.0k
  float block00 = coefficients[0] * 0.25f;
450
23.0k
  float block01 = coefficients[1];
451
23.0k
  float block10 = coefficients[8];
452
23.0k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
23.0k
  coefficients[1] = (block00 - block01) * 0.5f;
454
23.0k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
23.0k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
1.19M
                            float* JXL_RESTRICT coefficients) {
411
1.19M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
1.19M
  size_t afv_x = afv_kind & 1;
413
1.19M
  size_t afv_y = afv_kind / 2;
414
1.19M
  HWY_ALIGN float block[4 * 8] = {};
415
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
416
23.8M
    for (size_t ix = 0; ix < 4; ix++) {
417
19.1M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
19.1M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
19.1M
    }
420
4.77M
  }
421
  // AFV coefficients in (even, even) positions.
422
1.19M
  HWY_ALIGN float coeff[4 * 4];
423
1.19M
  AFVDCT4x4(block, coeff);
424
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
425
23.8M
    for (size_t ix = 0; ix < 4; ix++) {
426
19.1M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
19.1M
    }
428
4.77M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
1.19M
  ComputeScaledDCT<4, 4>()(
431
1.19M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
1.19M
              pixels_stride),
433
1.19M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
436
42.9M
    for (size_t ix = 0; ix < 8; ix++) {
437
38.2M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
38.2M
    }
439
4.77M
  }
440
  // 4x8 DCT of the other half of the block.
441
1.19M
  ComputeScaledDCT<4, 8>()(
442
1.19M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
1.19M
      block, scratch_space);
444
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
445
42.9M
    for (size_t ix = 0; ix < 8; ix++) {
446
38.2M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
38.2M
    }
448
4.77M
  }
449
1.19M
  float block00 = coefficients[0] * 0.25f;
450
1.19M
  float block01 = coefficients[1];
451
1.19M
  float block10 = coefficients[8];
452
1.19M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
1.19M
  coefficients[1] = (block00 - block01) * 0.5f;
454
1.19M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
1.19M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
1.19M
                            float* JXL_RESTRICT coefficients) {
411
1.19M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
1.19M
  size_t afv_x = afv_kind & 1;
413
1.19M
  size_t afv_y = afv_kind / 2;
414
1.19M
  HWY_ALIGN float block[4 * 8] = {};
415
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
416
23.8M
    for (size_t ix = 0; ix < 4; ix++) {
417
19.1M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
19.1M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
19.1M
    }
420
4.77M
  }
421
  // AFV coefficients in (even, even) positions.
422
1.19M
  HWY_ALIGN float coeff[4 * 4];
423
1.19M
  AFVDCT4x4(block, coeff);
424
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
425
23.8M
    for (size_t ix = 0; ix < 4; ix++) {
426
19.1M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
19.1M
    }
428
4.77M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
1.19M
  ComputeScaledDCT<4, 4>()(
431
1.19M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
1.19M
              pixels_stride),
433
1.19M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
436
42.9M
    for (size_t ix = 0; ix < 8; ix++) {
437
38.2M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
38.2M
    }
439
4.77M
  }
440
  // 4x8 DCT of the other half of the block.
441
1.19M
  ComputeScaledDCT<4, 8>()(
442
1.19M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
1.19M
      block, scratch_space);
444
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
445
42.9M
    for (size_t ix = 0; ix < 8; ix++) {
446
38.2M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
38.2M
    }
448
4.77M
  }
449
1.19M
  float block00 = coefficients[0] * 0.25f;
450
1.19M
  float block01 = coefficients[1];
451
1.19M
  float block10 = coefficients[8];
452
1.19M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
1.19M
  coefficients[1] = (block00 - block01) * 0.5f;
454
1.19M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
1.19M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
1.19M
                            float* JXL_RESTRICT coefficients) {
411
1.19M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
1.19M
  size_t afv_x = afv_kind & 1;
413
1.19M
  size_t afv_y = afv_kind / 2;
414
1.19M
  HWY_ALIGN float block[4 * 8] = {};
415
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
416
23.8M
    for (size_t ix = 0; ix < 4; ix++) {
417
19.1M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
19.1M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
19.1M
    }
420
4.77M
  }
421
  // AFV coefficients in (even, even) positions.
422
1.19M
  HWY_ALIGN float coeff[4 * 4];
423
1.19M
  AFVDCT4x4(block, coeff);
424
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
425
23.8M
    for (size_t ix = 0; ix < 4; ix++) {
426
19.1M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
19.1M
    }
428
4.77M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
1.19M
  ComputeScaledDCT<4, 4>()(
431
1.19M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
1.19M
              pixels_stride),
433
1.19M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
436
42.9M
    for (size_t ix = 0; ix < 8; ix++) {
437
38.2M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
38.2M
    }
439
4.77M
  }
440
  // 4x8 DCT of the other half of the block.
441
1.19M
  ComputeScaledDCT<4, 8>()(
442
1.19M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
1.19M
      block, scratch_space);
444
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
445
42.9M
    for (size_t ix = 0; ix < 8; ix++) {
446
38.2M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
38.2M
    }
448
4.77M
  }
449
1.19M
  float block00 = coefficients[0] * 0.25f;
450
1.19M
  float block01 = coefficients[1];
451
1.19M
  float block10 = coefficients[8];
452
1.19M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
1.19M
  coefficients[1] = (block00 - block01) * 0.5f;
454
1.19M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
1.19M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
1.19M
                            float* JXL_RESTRICT coefficients) {
411
1.19M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
1.19M
  size_t afv_x = afv_kind & 1;
413
1.19M
  size_t afv_y = afv_kind / 2;
414
1.19M
  HWY_ALIGN float block[4 * 8] = {};
415
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
416
23.8M
    for (size_t ix = 0; ix < 4; ix++) {
417
19.1M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
19.1M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
19.1M
    }
420
4.77M
  }
421
  // AFV coefficients in (even, even) positions.
422
1.19M
  HWY_ALIGN float coeff[4 * 4];
423
1.19M
  AFVDCT4x4(block, coeff);
424
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
425
23.8M
    for (size_t ix = 0; ix < 4; ix++) {
426
19.1M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
19.1M
    }
428
4.77M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
1.19M
  ComputeScaledDCT<4, 4>()(
431
1.19M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
1.19M
              pixels_stride),
433
1.19M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
436
42.9M
    for (size_t ix = 0; ix < 8; ix++) {
437
38.2M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
38.2M
    }
439
4.77M
  }
440
  // 4x8 DCT of the other half of the block.
441
1.19M
  ComputeScaledDCT<4, 8>()(
442
1.19M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
1.19M
      block, scratch_space);
444
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
445
42.9M
    for (size_t ix = 0; ix < 8; ix++) {
446
38.2M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
38.2M
    }
448
4.77M
  }
449
1.19M
  float block00 = coefficients[0] * 0.25f;
450
1.19M
  float block01 = coefficients[1];
451
1.19M
  float block10 = coefficients[8];
452
1.19M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
1.19M
  coefficients[1] = (block00 - block01) * 0.5f;
454
1.19M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
1.19M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
456
457
HWY_MAYBE_UNUSED void TransformFromPixels(const AcStrategyType strategy,
458
                                          const float* JXL_RESTRICT pixels,
459
                                          size_t pixels_stride,
460
                                          float* JXL_RESTRICT coefficients,
461
17.8M
                                          float* JXL_RESTRICT scratch_space) {
462
17.8M
  using Type = AcStrategyType;
463
17.8M
  switch (strategy) {
464
1.37M
    case Type::IDENTITY: {
465
4.12M
      for (size_t y = 0; y < 2; y++) {
466
8.25M
        for (size_t x = 0; x < 2; x++) {
467
5.50M
          float block_dc = 0;
468
27.5M
          for (size_t iy = 0; iy < 4; iy++) {
469
110M
            for (size_t ix = 0; ix < 4; ix++) {
470
88.0M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
88.0M
            }
472
22.0M
          }
473
5.50M
          block_dc *= 1.0f / 16;
474
27.5M
          for (size_t iy = 0; iy < 4; iy++) {
475
110M
            for (size_t ix = 0; ix < 4; ix++) {
476
88.0M
              if (ix == 1 && iy == 1) continue;
477
82.5M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
82.5M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
82.5M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
82.5M
            }
481
22.0M
          }
482
5.50M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
5.50M
          coefficients[y * 8 + x] = block_dc;
484
5.50M
        }
485
2.75M
      }
486
1.37M
      float block00 = coefficients[0];
487
1.37M
      float block01 = coefficients[1];
488
1.37M
      float block10 = coefficients[8];
489
1.37M
      float block11 = coefficients[9];
490
1.37M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
1.37M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
1.37M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
1.37M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
1.37M
      break;
495
0
    }
496
1.28M
    case Type::DCT8X4: {
497
3.86M
      for (size_t x = 0; x < 2; x++) {
498
2.57M
        HWY_ALIGN float block[4 * 8];
499
2.57M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
2.57M
                                 scratch_space);
501
12.8M
        for (size_t iy = 0; iy < 4; iy++) {
502
92.8M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
82.5M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
82.5M
          }
506
10.3M
        }
507
2.57M
      }
508
1.28M
      float block0 = coefficients[0];
509
1.28M
      float block1 = coefficients[8];
510
1.28M
      coefficients[0] = (block0 + block1) * 0.5f;
511
1.28M
      coefficients[8] = (block0 - block1) * 0.5f;
512
1.28M
      break;
513
0
    }
514
1.23M
    case Type::DCT4X8: {
515
3.71M
      for (size_t y = 0; y < 2; y++) {
516
2.47M
        HWY_ALIGN float block[4 * 8];
517
2.47M
        ComputeScaledDCT<4, 8>()(
518
2.47M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
2.47M
            scratch_space);
520
12.3M
        for (size_t iy = 0; iy < 4; iy++) {
521
89.0M
          for (size_t ix = 0; ix < 8; ix++) {
522
79.1M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
79.1M
          }
524
9.89M
        }
525
2.47M
      }
526
1.23M
      float block0 = coefficients[0];
527
1.23M
      float block1 = coefficients[8];
528
1.23M
      coefficients[0] = (block0 + block1) * 0.5f;
529
1.23M
      coefficients[8] = (block0 - block1) * 0.5f;
530
1.23M
      break;
531
0
    }
532
1.19M
    case Type::DCT4X4: {
533
3.58M
      for (size_t y = 0; y < 2; y++) {
534
7.16M
        for (size_t x = 0; x < 2; x++) {
535
4.77M
          HWY_ALIGN float block[4 * 4];
536
4.77M
          ComputeScaledDCT<4, 4>()(
537
4.77M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
4.77M
              block, scratch_space);
539
23.8M
          for (size_t iy = 0; iy < 4; iy++) {
540
95.5M
            for (size_t ix = 0; ix < 4; ix++) {
541
76.4M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
76.4M
            }
543
19.1M
          }
544
4.77M
        }
545
2.38M
      }
546
1.19M
      float block00 = coefficients[0];
547
1.19M
      float block01 = coefficients[1];
548
1.19M
      float block10 = coefficients[8];
549
1.19M
      float block11 = coefficients[9];
550
1.19M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
1.19M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
1.19M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
1.19M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
1.19M
      break;
555
0
    }
556
1.60M
    case Type::DCT2X2: {
557
1.60M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
1.60M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
1.60M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
1.60M
      break;
561
0
    }
562
564k
    case Type::DCT16X16: {
563
564k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
564k
                                 scratch_space);
565
564k
      break;
566
0
    }
567
1.08M
    case Type::DCT16X8: {
568
1.08M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
1.08M
                                scratch_space);
570
1.08M
      break;
571
0
    }
572
1.09M
    case Type::DCT8X16: {
573
1.09M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
1.09M
                                scratch_space);
575
1.09M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
214k
    case Type::DCT32X16: {
588
214k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
214k
                                 scratch_space);
590
214k
      break;
591
0
    }
592
220k
    case Type::DCT16X32: {
593
220k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
220k
                                 scratch_space);
595
220k
      break;
596
0
    }
597
130k
    case Type::DCT32X32: {
598
130k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
130k
                                 scratch_space);
600
130k
      break;
601
0
    }
602
2.76M
    case Type::DCT: {
603
2.76M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
2.76M
                               scratch_space);
605
2.76M
      break;
606
0
    }
607
1.23M
    case Type::AFV0: {
608
1.23M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
1.23M
      break;
610
0
    }
611
1.22M
    case Type::AFV1: {
612
1.22M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
1.22M
      break;
614
0
    }
615
1.22M
    case Type::AFV2: {
616
1.22M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
1.22M
      break;
618
0
    }
619
1.24M
    case Type::AFV3: {
620
1.24M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
1.24M
      break;
622
0
    }
623
19.4k
    case Type::DCT64X64: {
624
19.4k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
19.4k
                                 scratch_space);
626
19.4k
      break;
627
0
    }
628
66.1k
    case Type::DCT64X32: {
629
66.1k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
66.1k
                                 scratch_space);
631
66.1k
      break;
632
0
    }
633
48.2k
    case Type::DCT32X64: {
634
48.2k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
48.2k
                                 scratch_space);
636
48.2k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
17.8M
  }
669
17.8M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
725k
                                          float* JXL_RESTRICT scratch_space) {
462
725k
  using Type = AcStrategyType;
463
725k
  switch (strategy) {
464
90.8k
    case Type::IDENTITY: {
465
272k
      for (size_t y = 0; y < 2; y++) {
466
544k
        for (size_t x = 0; x < 2; x++) {
467
363k
          float block_dc = 0;
468
1.81M
          for (size_t iy = 0; iy < 4; iy++) {
469
7.26M
            for (size_t ix = 0; ix < 4; ix++) {
470
5.81M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
5.81M
            }
472
1.45M
          }
473
363k
          block_dc *= 1.0f / 16;
474
1.81M
          for (size_t iy = 0; iy < 4; iy++) {
475
7.26M
            for (size_t ix = 0; ix < 4; ix++) {
476
5.81M
              if (ix == 1 && iy == 1) continue;
477
5.44M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
5.44M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
5.44M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
5.44M
            }
481
1.45M
          }
482
363k
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
363k
          coefficients[y * 8 + x] = block_dc;
484
363k
        }
485
181k
      }
486
90.8k
      float block00 = coefficients[0];
487
90.8k
      float block01 = coefficients[1];
488
90.8k
      float block10 = coefficients[8];
489
90.8k
      float block11 = coefficients[9];
490
90.8k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
90.8k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
90.8k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
90.8k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
90.8k
      break;
495
0
    }
496
47.6k
    case Type::DCT8X4: {
497
143k
      for (size_t x = 0; x < 2; x++) {
498
95.3k
        HWY_ALIGN float block[4 * 8];
499
95.3k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
95.3k
                                 scratch_space);
501
476k
        for (size_t iy = 0; iy < 4; iy++) {
502
3.43M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
3.05M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
3.05M
          }
506
381k
        }
507
95.3k
      }
508
47.6k
      float block0 = coefficients[0];
509
47.6k
      float block1 = coefficients[8];
510
47.6k
      coefficients[0] = (block0 + block1) * 0.5f;
511
47.6k
      coefficients[8] = (block0 - block1) * 0.5f;
512
47.6k
      break;
513
0
    }
514
21.4k
    case Type::DCT4X8: {
515
64.4k
      for (size_t y = 0; y < 2; y++) {
516
42.9k
        HWY_ALIGN float block[4 * 8];
517
42.9k
        ComputeScaledDCT<4, 8>()(
518
42.9k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
42.9k
            scratch_space);
520
214k
        for (size_t iy = 0; iy < 4; iy++) {
521
1.54M
          for (size_t ix = 0; ix < 8; ix++) {
522
1.37M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
1.37M
          }
524
171k
        }
525
42.9k
      }
526
21.4k
      float block0 = coefficients[0];
527
21.4k
      float block1 = coefficients[8];
528
21.4k
      coefficients[0] = (block0 + block1) * 0.5f;
529
21.4k
      coefficients[8] = (block0 - block1) * 0.5f;
530
21.4k
      break;
531
0
    }
532
267
    case Type::DCT4X4: {
533
801
      for (size_t y = 0; y < 2; y++) {
534
1.60k
        for (size_t x = 0; x < 2; x++) {
535
1.06k
          HWY_ALIGN float block[4 * 4];
536
1.06k
          ComputeScaledDCT<4, 4>()(
537
1.06k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
1.06k
              block, scratch_space);
539
5.34k
          for (size_t iy = 0; iy < 4; iy++) {
540
21.3k
            for (size_t ix = 0; ix < 4; ix++) {
541
17.0k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
17.0k
            }
543
4.27k
          }
544
1.06k
        }
545
534
      }
546
267
      float block00 = coefficients[0];
547
267
      float block01 = coefficients[1];
548
267
      float block10 = coefficients[8];
549
267
      float block11 = coefficients[9];
550
267
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
267
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
267
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
267
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
267
      break;
555
0
    }
556
206k
    case Type::DCT2X2: {
557
206k
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
206k
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
206k
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
206k
      break;
561
0
    }
562
19.1k
    case Type::DCT16X16: {
563
19.1k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
19.1k
                                 scratch_space);
565
19.1k
      break;
566
0
    }
567
24.7k
    case Type::DCT16X8: {
568
24.7k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
24.7k
                                scratch_space);
570
24.7k
      break;
571
0
    }
572
27.7k
    case Type::DCT8X16: {
573
27.7k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
27.7k
                                scratch_space);
575
27.7k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
5.57k
    case Type::DCT32X16: {
588
5.57k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
5.57k
                                 scratch_space);
590
5.57k
      break;
591
0
    }
592
6.93k
    case Type::DCT16X32: {
593
6.93k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
6.93k
                                 scratch_space);
595
6.93k
      break;
596
0
    }
597
12.3k
    case Type::DCT32X32: {
598
12.3k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
12.3k
                                 scratch_space);
600
12.3k
      break;
601
0
    }
602
187k
    case Type::DCT: {
603
187k
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
187k
                               scratch_space);
605
187k
      break;
606
0
    }
607
20.0k
    case Type::AFV0: {
608
20.0k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
20.0k
      break;
610
0
    }
611
14.4k
    case Type::AFV1: {
612
14.4k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
14.4k
      break;
614
0
    }
615
15.6k
    case Type::AFV2: {
616
15.6k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
15.6k
      break;
618
0
    }
619
23.0k
    case Type::AFV3: {
620
23.0k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
23.0k
      break;
622
0
    }
623
1.23k
    case Type::DCT64X64: {
624
1.23k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
1.23k
                                 scratch_space);
626
1.23k
      break;
627
0
    }
628
162
    case Type::DCT64X32: {
629
162
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
162
                                 scratch_space);
631
162
      break;
632
0
    }
633
123
    case Type::DCT32X64: {
634
123
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
123
                                 scratch_space);
636
123
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
725k
  }
669
725k
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
1.91M
                                          float* JXL_RESTRICT scratch_space) {
462
1.91M
  using Type = AcStrategyType;
463
1.91M
  switch (strategy) {
464
90.8k
    case Type::IDENTITY: {
465
272k
      for (size_t y = 0; y < 2; y++) {
466
544k
        for (size_t x = 0; x < 2; x++) {
467
363k
          float block_dc = 0;
468
1.81M
          for (size_t iy = 0; iy < 4; iy++) {
469
7.26M
            for (size_t ix = 0; ix < 4; ix++) {
470
5.81M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
5.81M
            }
472
1.45M
          }
473
363k
          block_dc *= 1.0f / 16;
474
1.81M
          for (size_t iy = 0; iy < 4; iy++) {
475
7.26M
            for (size_t ix = 0; ix < 4; ix++) {
476
5.81M
              if (ix == 1 && iy == 1) continue;
477
5.44M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
5.44M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
5.44M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
5.44M
            }
481
1.45M
          }
482
363k
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
363k
          coefficients[y * 8 + x] = block_dc;
484
363k
        }
485
181k
      }
486
90.8k
      float block00 = coefficients[0];
487
90.8k
      float block01 = coefficients[1];
488
90.8k
      float block10 = coefficients[8];
489
90.8k
      float block11 = coefficients[9];
490
90.8k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
90.8k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
90.8k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
90.8k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
90.8k
      break;
495
0
    }
496
47.6k
    case Type::DCT8X4: {
497
143k
      for (size_t x = 0; x < 2; x++) {
498
95.3k
        HWY_ALIGN float block[4 * 8];
499
95.3k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
95.3k
                                 scratch_space);
501
476k
        for (size_t iy = 0; iy < 4; iy++) {
502
3.43M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
3.05M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
3.05M
          }
506
381k
        }
507
95.3k
      }
508
47.6k
      float block0 = coefficients[0];
509
47.6k
      float block1 = coefficients[8];
510
47.6k
      coefficients[0] = (block0 + block1) * 0.5f;
511
47.6k
      coefficients[8] = (block0 - block1) * 0.5f;
512
47.6k
      break;
513
0
    }
514
21.4k
    case Type::DCT4X8: {
515
64.4k
      for (size_t y = 0; y < 2; y++) {
516
42.9k
        HWY_ALIGN float block[4 * 8];
517
42.9k
        ComputeScaledDCT<4, 8>()(
518
42.9k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
42.9k
            scratch_space);
520
214k
        for (size_t iy = 0; iy < 4; iy++) {
521
1.54M
          for (size_t ix = 0; ix < 8; ix++) {
522
1.37M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
1.37M
          }
524
171k
        }
525
42.9k
      }
526
21.4k
      float block0 = coefficients[0];
527
21.4k
      float block1 = coefficients[8];
528
21.4k
      coefficients[0] = (block0 + block1) * 0.5f;
529
21.4k
      coefficients[8] = (block0 - block1) * 0.5f;
530
21.4k
      break;
531
0
    }
532
267
    case Type::DCT4X4: {
533
801
      for (size_t y = 0; y < 2; y++) {
534
1.60k
        for (size_t x = 0; x < 2; x++) {
535
1.06k
          HWY_ALIGN float block[4 * 4];
536
1.06k
          ComputeScaledDCT<4, 4>()(
537
1.06k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
1.06k
              block, scratch_space);
539
5.34k
          for (size_t iy = 0; iy < 4; iy++) {
540
21.3k
            for (size_t ix = 0; ix < 4; ix++) {
541
17.0k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
17.0k
            }
543
4.27k
          }
544
1.06k
        }
545
534
      }
546
267
      float block00 = coefficients[0];
547
267
      float block01 = coefficients[1];
548
267
      float block10 = coefficients[8];
549
267
      float block11 = coefficients[9];
550
267
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
267
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
267
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
267
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
267
      break;
555
0
    }
556
206k
    case Type::DCT2X2: {
557
206k
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
206k
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
206k
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
206k
      break;
561
0
    }
562
19.1k
    case Type::DCT16X16: {
563
19.1k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
19.1k
                                 scratch_space);
565
19.1k
      break;
566
0
    }
567
24.7k
    case Type::DCT16X8: {
568
24.7k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
24.7k
                                scratch_space);
570
24.7k
      break;
571
0
    }
572
27.7k
    case Type::DCT8X16: {
573
27.7k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
27.7k
                                scratch_space);
575
27.7k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
5.57k
    case Type::DCT32X16: {
588
5.57k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
5.57k
                                 scratch_space);
590
5.57k
      break;
591
0
    }
592
6.93k
    case Type::DCT16X32: {
593
6.93k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
6.93k
                                 scratch_space);
595
6.93k
      break;
596
0
    }
597
12.3k
    case Type::DCT32X32: {
598
12.3k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
12.3k
                                 scratch_space);
600
12.3k
      break;
601
0
    }
602
1.38M
    case Type::DCT: {
603
1.38M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
1.38M
                               scratch_space);
605
1.38M
      break;
606
0
    }
607
20.0k
    case Type::AFV0: {
608
20.0k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
20.0k
      break;
610
0
    }
611
14.4k
    case Type::AFV1: {
612
14.4k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
14.4k
      break;
614
0
    }
615
15.6k
    case Type::AFV2: {
616
15.6k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
15.6k
      break;
618
0
    }
619
23.0k
    case Type::AFV3: {
620
23.0k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
23.0k
      break;
622
0
    }
623
1.23k
    case Type::DCT64X64: {
624
1.23k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
1.23k
                                 scratch_space);
626
1.23k
      break;
627
0
    }
628
162
    case Type::DCT64X32: {
629
162
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
162
                                 scratch_space);
631
162
      break;
632
0
    }
633
123
    case Type::DCT32X64: {
634
123
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
123
                                 scratch_space);
636
123
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
1.91M
  }
669
1.91M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
15.1M
                                          float* JXL_RESTRICT scratch_space) {
462
15.1M
  using Type = AcStrategyType;
463
15.1M
  switch (strategy) {
464
1.19M
    case Type::IDENTITY: {
465
3.58M
      for (size_t y = 0; y < 2; y++) {
466
7.16M
        for (size_t x = 0; x < 2; x++) {
467
4.77M
          float block_dc = 0;
468
23.8M
          for (size_t iy = 0; iy < 4; iy++) {
469
95.5M
            for (size_t ix = 0; ix < 4; ix++) {
470
76.4M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
76.4M
            }
472
19.1M
          }
473
4.77M
          block_dc *= 1.0f / 16;
474
23.8M
          for (size_t iy = 0; iy < 4; iy++) {
475
95.5M
            for (size_t ix = 0; ix < 4; ix++) {
476
76.4M
              if (ix == 1 && iy == 1) continue;
477
71.6M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
71.6M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
71.6M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
71.6M
            }
481
19.1M
          }
482
4.77M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
4.77M
          coefficients[y * 8 + x] = block_dc;
484
4.77M
        }
485
2.38M
      }
486
1.19M
      float block00 = coefficients[0];
487
1.19M
      float block01 = coefficients[1];
488
1.19M
      float block10 = coefficients[8];
489
1.19M
      float block11 = coefficients[9];
490
1.19M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
1.19M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
1.19M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
1.19M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
1.19M
      break;
495
0
    }
496
1.19M
    case Type::DCT8X4: {
497
3.58M
      for (size_t x = 0; x < 2; x++) {
498
2.38M
        HWY_ALIGN float block[4 * 8];
499
2.38M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
2.38M
                                 scratch_space);
501
11.9M
        for (size_t iy = 0; iy < 4; iy++) {
502
85.9M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
76.4M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
76.4M
          }
506
9.55M
        }
507
2.38M
      }
508
1.19M
      float block0 = coefficients[0];
509
1.19M
      float block1 = coefficients[8];
510
1.19M
      coefficients[0] = (block0 + block1) * 0.5f;
511
1.19M
      coefficients[8] = (block0 - block1) * 0.5f;
512
1.19M
      break;
513
0
    }
514
1.19M
    case Type::DCT4X8: {
515
3.58M
      for (size_t y = 0; y < 2; y++) {
516
2.38M
        HWY_ALIGN float block[4 * 8];
517
2.38M
        ComputeScaledDCT<4, 8>()(
518
2.38M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
2.38M
            scratch_space);
520
11.9M
        for (size_t iy = 0; iy < 4; iy++) {
521
85.9M
          for (size_t ix = 0; ix < 8; ix++) {
522
76.4M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
76.4M
          }
524
9.55M
        }
525
2.38M
      }
526
1.19M
      float block0 = coefficients[0];
527
1.19M
      float block1 = coefficients[8];
528
1.19M
      coefficients[0] = (block0 + block1) * 0.5f;
529
1.19M
      coefficients[8] = (block0 - block1) * 0.5f;
530
1.19M
      break;
531
0
    }
532
1.19M
    case Type::DCT4X4: {
533
3.58M
      for (size_t y = 0; y < 2; y++) {
534
7.16M
        for (size_t x = 0; x < 2; x++) {
535
4.77M
          HWY_ALIGN float block[4 * 4];
536
4.77M
          ComputeScaledDCT<4, 4>()(
537
4.77M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
4.77M
              block, scratch_space);
539
23.8M
          for (size_t iy = 0; iy < 4; iy++) {
540
95.5M
            for (size_t ix = 0; ix < 4; ix++) {
541
76.4M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
76.4M
            }
543
19.1M
          }
544
4.77M
        }
545
2.38M
      }
546
1.19M
      float block00 = coefficients[0];
547
1.19M
      float block01 = coefficients[1];
548
1.19M
      float block10 = coefficients[8];
549
1.19M
      float block11 = coefficients[9];
550
1.19M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
1.19M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
1.19M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
1.19M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
1.19M
      break;
555
0
    }
556
1.19M
    case Type::DCT2X2: {
557
1.19M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
1.19M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
1.19M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
1.19M
      break;
561
0
    }
562
525k
    case Type::DCT16X16: {
563
525k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
525k
                                 scratch_space);
565
525k
      break;
566
0
    }
567
1.03M
    case Type::DCT16X8: {
568
1.03M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
1.03M
                                scratch_space);
570
1.03M
      break;
571
0
    }
572
1.03M
    case Type::DCT8X16: {
573
1.03M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
1.03M
                                scratch_space);
575
1.03M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
203k
    case Type::DCT32X16: {
588
203k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
203k
                                 scratch_space);
590
203k
      break;
591
0
    }
592
206k
    case Type::DCT16X32: {
593
206k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
206k
                                 scratch_space);
595
206k
      break;
596
0
    }
597
106k
    case Type::DCT32X32: {
598
106k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
106k
                                 scratch_space);
600
106k
      break;
601
0
    }
602
1.19M
    case Type::DCT: {
603
1.19M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
1.19M
                               scratch_space);
605
1.19M
      break;
606
0
    }
607
1.19M
    case Type::AFV0: {
608
1.19M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
1.19M
      break;
610
0
    }
611
1.19M
    case Type::AFV1: {
612
1.19M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
1.19M
      break;
614
0
    }
615
1.19M
    case Type::AFV2: {
616
1.19M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
1.19M
      break;
618
0
    }
619
1.19M
    case Type::AFV3: {
620
1.19M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
1.19M
      break;
622
0
    }
623
17.0k
    case Type::DCT64X64: {
624
17.0k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
17.0k
                                 scratch_space);
626
17.0k
      break;
627
0
    }
628
65.8k
    case Type::DCT64X32: {
629
65.8k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
65.8k
                                 scratch_space);
631
65.8k
      break;
632
0
    }
633
48.0k
    case Type::DCT32X64: {
634
48.0k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
48.0k
                                 scratch_space);
636
48.0k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
15.1M
  }
669
15.1M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
670
671
// `scratch_space` should be at least 4 * kMaxBlocks * kMaxBlocks elements.
672
HWY_MAYBE_UNUSED void DCFromLowestFrequencies(const AcStrategyType strategy,
673
                                              const float* block, float* dc,
674
                                              size_t dc_stride,
675
2.64M
                                              float* scratch_space) {
676
2.64M
  using Type = AcStrategyType;
677
2.64M
  switch (strategy) {
678
49.4k
    case Type::DCT16X8: {
679
49.4k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
49.4k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
49.4k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
49.4k
      break;
683
0
    }
684
55.4k
    case Type::DCT8X16: {
685
55.4k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
55.4k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
55.4k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
55.4k
      break;
689
0
    }
690
38.3k
    case Type::DCT16X16: {
691
38.3k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
38.3k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
38.3k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
38.3k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
11.1k
    case Type::DCT32X16: {
709
11.1k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
11.1k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
11.1k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
11.1k
      break;
713
0
    }
714
13.8k
    case Type::DCT16X32: {
715
13.8k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
13.8k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
13.8k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
13.8k
      break;
719
0
    }
720
24.6k
    case Type::DCT32X32: {
721
24.6k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
24.6k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
24.6k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
24.6k
      break;
725
0
    }
726
324
    case Type::DCT64X32: {
727
324
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
324
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
324
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
324
      break;
731
0
    }
732
246
    case Type::DCT32X64: {
733
246
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
246
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
246
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
246
      break;
737
0
    }
738
2.46k
    case Type::DCT64X64: {
739
2.46k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
2.46k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
2.46k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
2.46k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
1.56M
    case Type::DCT:
787
1.98M
    case Type::DCT2X2:
788
1.98M
    case Type::DCT4X4:
789
2.02M
    case Type::DCT4X8:
790
2.12M
    case Type::DCT8X4:
791
2.16M
    case Type::AFV0:
792
2.18M
    case Type::AFV1:
793
2.22M
    case Type::AFV2:
794
2.26M
    case Type::AFV3:
795
2.44M
    case Type::IDENTITY:
796
2.44M
      dc[0] = block[0];
797
2.44M
      break;
798
2.64M
  }
799
2.64M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
725k
                                              float* scratch_space) {
676
725k
  using Type = AcStrategyType;
677
725k
  switch (strategy) {
678
24.7k
    case Type::DCT16X8: {
679
24.7k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
24.7k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
24.7k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
24.7k
      break;
683
0
    }
684
27.7k
    case Type::DCT8X16: {
685
27.7k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
27.7k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
27.7k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
27.7k
      break;
689
0
    }
690
19.1k
    case Type::DCT16X16: {
691
19.1k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
19.1k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
19.1k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
19.1k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
5.57k
    case Type::DCT32X16: {
709
5.57k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
5.57k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
5.57k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
5.57k
      break;
713
0
    }
714
6.93k
    case Type::DCT16X32: {
715
6.93k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
6.93k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
6.93k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
6.93k
      break;
719
0
    }
720
12.3k
    case Type::DCT32X32: {
721
12.3k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
12.3k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
12.3k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
12.3k
      break;
725
0
    }
726
162
    case Type::DCT64X32: {
727
162
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
162
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
162
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
162
      break;
731
0
    }
732
123
    case Type::DCT32X64: {
733
123
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
123
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
123
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
123
      break;
737
0
    }
738
1.23k
    case Type::DCT64X64: {
739
1.23k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
1.23k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
1.23k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
1.23k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
187k
    case Type::DCT:
787
393k
    case Type::DCT2X2:
788
394k
    case Type::DCT4X4:
789
415k
    case Type::DCT4X8:
790
463k
    case Type::DCT8X4:
791
483k
    case Type::AFV0:
792
497k
    case Type::AFV1:
793
513k
    case Type::AFV2:
794
536k
    case Type::AFV3:
795
627k
    case Type::IDENTITY:
796
627k
      dc[0] = block[0];
797
627k
      break;
798
725k
  }
799
725k
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
1.91M
                                              float* scratch_space) {
676
1.91M
  using Type = AcStrategyType;
677
1.91M
  switch (strategy) {
678
24.7k
    case Type::DCT16X8: {
679
24.7k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
24.7k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
24.7k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
24.7k
      break;
683
0
    }
684
27.7k
    case Type::DCT8X16: {
685
27.7k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
27.7k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
27.7k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
27.7k
      break;
689
0
    }
690
19.1k
    case Type::DCT16X16: {
691
19.1k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
19.1k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
19.1k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
19.1k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
5.57k
    case Type::DCT32X16: {
709
5.57k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
5.57k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
5.57k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
5.57k
      break;
713
0
    }
714
6.93k
    case Type::DCT16X32: {
715
6.93k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
6.93k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
6.93k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
6.93k
      break;
719
0
    }
720
12.3k
    case Type::DCT32X32: {
721
12.3k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
12.3k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
12.3k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
12.3k
      break;
725
0
    }
726
162
    case Type::DCT64X32: {
727
162
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
162
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
162
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
162
      break;
731
0
    }
732
123
    case Type::DCT32X64: {
733
123
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
123
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
123
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
123
      break;
737
0
    }
738
1.23k
    case Type::DCT64X64: {
739
1.23k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
1.23k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
1.23k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
1.23k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
1.38M
    case Type::DCT:
787
1.58M
    case Type::DCT2X2:
788
1.58M
    case Type::DCT4X4:
789
1.61M
    case Type::DCT4X8:
790
1.65M
    case Type::DCT8X4:
791
1.67M
    case Type::AFV0:
792
1.69M
    case Type::AFV1:
793
1.70M
    case Type::AFV2:
794
1.73M
    case Type::AFV3:
795
1.82M
    case Type::IDENTITY:
796
1.82M
      dc[0] = block[0];
797
1.82M
      break;
798
1.91M
  }
799
1.91M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
800
801
}  // namespace
802
// NOLINTNEXTLINE(google-readability-namespace-comments)
803
}  // namespace HWY_NAMESPACE
804
}  // namespace jxl
805
HWY_AFTER_NAMESPACE();
806
807
#endif  // LIB_JXL_ENC_TRANSFORMS_INL_H_