Coverage Report

Created: 2026-05-16 07:22

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/enc_transforms-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/base/compiler_specific.h"
7
#include "lib/jxl/frame_dimensions.h"
8
9
#if defined(LIB_JXL_ENC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
10
#ifdef LIB_JXL_ENC_TRANSFORMS_INL_H_
11
#undef LIB_JXL_ENC_TRANSFORMS_INL_H_
12
#else
13
#define LIB_JXL_ENC_TRANSFORMS_INL_H_
14
#endif
15
16
#include <cstddef>
17
#include <cstdint>
18
#include <hwy/highway.h>
19
20
#include "lib/jxl/ac_strategy.h"
21
#include "lib/jxl/dct-inl.h"
22
#include "lib/jxl/dct_scales.h"
23
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
27
enum class AcStrategyType : uint32_t;
28
29
namespace HWY_NAMESPACE {
30
namespace {
31
32
constexpr size_t kMaxBlocks = 32;
33
34
// Inverse of ReinterpretingDCT.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
HWY_INLINE void ReinterpretingIDCT(const float* input,
38
                                   const size_t input_stride, float* output,
39
1.88M
                                   const size_t output_stride, float* scratch) {
40
1.88M
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
1.88M
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
1.88M
  float* block = scratch;
43
1.88M
  if (ROWS < COLS) {
44
1.41M
    for (size_t y = 0; y < LF_ROWS; y++) {
45
3.13M
      for (size_t x = 0; x < LF_COLS; x++) {
46
2.34M
        block[y * COLS + x] = input[y * input_stride + x] *
47
2.34M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
2.34M
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
2.34M
      }
50
786k
    }
51
1.25M
  } else {
52
4.41M
    for (size_t y = 0; y < LF_COLS; y++) {
53
17.3M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
14.2M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
14.2M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
14.2M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
14.2M
      }
58
3.15M
    }
59
1.25M
  }
60
61
1.88M
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
1.88M
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
1.88M
                                  scratch_space);
64
1.88M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
234k
                                   const size_t output_stride, float* scratch) {
40
234k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
234k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
234k
  float* block = scratch;
43
234k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
234k
  } else {
52
469k
    for (size_t y = 0; y < LF_COLS; y++) {
53
704k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
469k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
469k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
469k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
469k
      }
58
234k
    }
59
234k
  }
60
61
234k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
234k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
234k
                                  scratch_space);
64
234k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
251k
                                   const size_t output_stride, float* scratch) {
40
251k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
251k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
251k
  float* block = scratch;
43
251k
  if (ROWS < COLS) {
44
503k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
754k
      for (size_t x = 0; x < LF_COLS; x++) {
46
503k
        block[y * COLS + x] = input[y * input_stride + x] *
47
503k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
503k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
503k
      }
50
251k
    }
51
251k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
251k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
251k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
251k
                                  scratch_space);
64
251k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
172k
                                   const size_t output_stride, float* scratch) {
40
172k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
172k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
172k
  float* block = scratch;
43
172k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
172k
  } else {
52
517k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.03M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
690k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
690k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
690k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
690k
      }
58
345k
    }
59
172k
  }
60
61
172k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
172k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
172k
                                  scratch_space);
64
172k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
55.6k
                                   const size_t output_stride, float* scratch) {
40
55.6k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
55.6k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
55.6k
  float* block = scratch;
43
55.6k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
55.6k
  } else {
52
166k
    for (size_t y = 0; y < LF_COLS; y++) {
53
556k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
444k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
444k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
444k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
444k
      }
58
111k
    }
59
55.6k
  }
60
61
55.6k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
55.6k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
55.6k
                                  scratch_space);
64
55.6k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
57.7k
                                   const size_t output_stride, float* scratch) {
40
57.7k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
57.7k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
57.7k
  float* block = scratch;
43
57.7k
  if (ROWS < COLS) {
44
173k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
577k
      for (size_t x = 0; x < LF_COLS; x++) {
46
462k
        block[y * COLS + x] = input[y * input_stride + x] *
47
462k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
462k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
462k
      }
50
115k
    }
51
57.7k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
57.7k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
57.7k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
57.7k
                                  scratch_space);
64
57.7k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
98.4k
                                   const size_t output_stride, float* scratch) {
40
98.4k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
98.4k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
98.4k
  float* block = scratch;
43
98.4k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
98.4k
  } else {
52
492k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.96M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
1.57M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
1.57M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
1.57M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
1.57M
      }
58
393k
    }
59
98.4k
  }
60
61
98.4k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
98.4k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
98.4k
                                  scratch_space);
64
98.4k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
12.0k
                                   const size_t output_stride, float* scratch) {
40
12.0k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
12.0k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
12.0k
  float* block = scratch;
43
12.0k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
12.0k
  } else {
52
60.3k
    for (size_t y = 0; y < LF_COLS; y++) {
53
434k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
386k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
386k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
386k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
386k
      }
58
48.2k
    }
59
12.0k
  }
60
61
12.0k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
12.0k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
12.0k
                                  scratch_space);
64
12.0k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
6.50k
                                   const size_t output_stride, float* scratch) {
40
6.50k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
6.50k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
6.50k
  float* block = scratch;
43
6.50k
  if (ROWS < COLS) {
44
32.5k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
234k
      for (size_t x = 0; x < LF_COLS; x++) {
46
208k
        block[y * COLS + x] = input[y * input_stride + x] *
47
208k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
208k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
208k
      }
50
26.0k
    }
51
6.50k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
6.50k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
6.50k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
6.50k
                                  scratch_space);
64
6.50k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
55.4k
                                   const size_t output_stride, float* scratch) {
40
55.4k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
55.4k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
55.4k
  float* block = scratch;
43
55.4k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
55.4k
  } else {
52
498k
    for (size_t y = 0; y < LF_COLS; y++) {
53
3.98M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
3.54M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
3.54M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
3.54M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
3.54M
      }
58
443k
    }
59
55.4k
  }
60
61
55.4k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
55.4k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
55.4k
                                  scratch_space);
64
55.4k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
234k
                                   const size_t output_stride, float* scratch) {
40
234k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
234k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
234k
  float* block = scratch;
43
234k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
234k
  } else {
52
469k
    for (size_t y = 0; y < LF_COLS; y++) {
53
704k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
469k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
469k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
469k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
469k
      }
58
234k
    }
59
234k
  }
60
61
234k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
234k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
234k
                                  scratch_space);
64
234k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
251k
                                   const size_t output_stride, float* scratch) {
40
251k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
251k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
251k
  float* block = scratch;
43
251k
  if (ROWS < COLS) {
44
503k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
754k
      for (size_t x = 0; x < LF_COLS; x++) {
46
503k
        block[y * COLS + x] = input[y * input_stride + x] *
47
503k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
503k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
503k
      }
50
251k
    }
51
251k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
251k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
251k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
251k
                                  scratch_space);
64
251k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
172k
                                   const size_t output_stride, float* scratch) {
40
172k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
172k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
172k
  float* block = scratch;
43
172k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
172k
  } else {
52
517k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.03M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
690k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
690k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
690k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
690k
      }
58
345k
    }
59
172k
  }
60
61
172k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
172k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
172k
                                  scratch_space);
64
172k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
55.6k
                                   const size_t output_stride, float* scratch) {
40
55.6k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
55.6k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
55.6k
  float* block = scratch;
43
55.6k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
55.6k
  } else {
52
166k
    for (size_t y = 0; y < LF_COLS; y++) {
53
556k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
444k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
444k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
444k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
444k
      }
58
111k
    }
59
55.6k
  }
60
61
55.6k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
55.6k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
55.6k
                                  scratch_space);
64
55.6k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
57.7k
                                   const size_t output_stride, float* scratch) {
40
57.7k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
57.7k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
57.7k
  float* block = scratch;
43
57.7k
  if (ROWS < COLS) {
44
173k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
577k
      for (size_t x = 0; x < LF_COLS; x++) {
46
462k
        block[y * COLS + x] = input[y * input_stride + x] *
47
462k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
462k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
462k
      }
50
115k
    }
51
57.7k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
57.7k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
57.7k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
57.7k
                                  scratch_space);
64
57.7k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
98.4k
                                   const size_t output_stride, float* scratch) {
40
98.4k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
98.4k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
98.4k
  float* block = scratch;
43
98.4k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
98.4k
  } else {
52
492k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.96M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
1.57M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
1.57M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
1.57M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
1.57M
      }
58
393k
    }
59
98.4k
  }
60
61
98.4k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
98.4k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
98.4k
                                  scratch_space);
64
98.4k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
12.0k
                                   const size_t output_stride, float* scratch) {
40
12.0k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
12.0k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
12.0k
  float* block = scratch;
43
12.0k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
12.0k
  } else {
52
60.3k
    for (size_t y = 0; y < LF_COLS; y++) {
53
434k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
386k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
386k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
386k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
386k
      }
58
48.2k
    }
59
12.0k
  }
60
61
12.0k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
12.0k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
12.0k
                                  scratch_space);
64
12.0k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
6.50k
                                   const size_t output_stride, float* scratch) {
40
6.50k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
6.50k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
6.50k
  float* block = scratch;
43
6.50k
  if (ROWS < COLS) {
44
32.5k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
234k
      for (size_t x = 0; x < LF_COLS; x++) {
46
208k
        block[y * COLS + x] = input[y * input_stride + x] *
47
208k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
208k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
208k
      }
50
26.0k
    }
51
6.50k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
6.50k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
6.50k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
6.50k
                                  scratch_space);
64
6.50k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
55.4k
                                   const size_t output_stride, float* scratch) {
40
55.4k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
55.4k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
55.4k
  float* block = scratch;
43
55.4k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
55.4k
  } else {
52
498k
    for (size_t y = 0; y < LF_COLS; y++) {
53
3.98M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
3.54M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
3.54M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
3.54M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
3.54M
      }
58
443k
    }
59
55.4k
  }
60
61
55.4k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
55.4k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
55.4k
                                  scratch_space);
64
55.4k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
65
66
template <size_t S>
67
52.1M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
52.1M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
52.1M
  static_assert(S % 2 == 0, "S should be even");
70
52.1M
  float temp[kDCTBlockSize];
71
52.1M
  constexpr size_t num_2x2 = S / 2;
72
173M
  for (size_t y = 0; y < num_2x2; y++) {
73
486M
    for (size_t x = 0; x < num_2x2; x++) {
74
365M
      float c00 = block[y * 2 * stride + x * 2];
75
365M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
365M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
365M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
365M
      float r00 = c00 + c01 + c10 + c11;
79
365M
      float r01 = c00 + c01 - c10 - c11;
80
365M
      float r10 = c00 - c01 + c10 - c11;
81
365M
      float r11 = c00 - c01 - c10 + c11;
82
365M
      r00 *= 0.25f;
83
365M
      r01 *= 0.25f;
84
365M
      r10 *= 0.25f;
85
365M
      r11 *= 0.25f;
86
365M
      temp[y * kBlockDim + x] = r00;
87
365M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
365M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
365M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
365M
    }
91
121M
  }
92
295M
  for (size_t y = 0; y < S; y++) {
93
1.70G
    for (size_t x = 0; x < S; x++) {
94
1.46G
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
1.46G
    }
96
243M
  }
97
52.1M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.09M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
2.09M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.09M
  static_assert(S % 2 == 0, "S should be even");
70
2.09M
  float temp[kDCTBlockSize];
71
2.09M
  constexpr size_t num_2x2 = S / 2;
72
10.4M
  for (size_t y = 0; y < num_2x2; y++) {
73
41.8M
    for (size_t x = 0; x < num_2x2; x++) {
74
33.4M
      float c00 = block[y * 2 * stride + x * 2];
75
33.4M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
33.4M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
33.4M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
33.4M
      float r00 = c00 + c01 + c10 + c11;
79
33.4M
      float r01 = c00 + c01 - c10 - c11;
80
33.4M
      float r10 = c00 - c01 + c10 - c11;
81
33.4M
      float r11 = c00 - c01 - c10 + c11;
82
33.4M
      r00 *= 0.25f;
83
33.4M
      r01 *= 0.25f;
84
33.4M
      r10 *= 0.25f;
85
33.4M
      r11 *= 0.25f;
86
33.4M
      temp[y * kBlockDim + x] = r00;
87
33.4M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
33.4M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
33.4M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
33.4M
    }
91
8.36M
  }
92
18.8M
  for (size_t y = 0; y < S; y++) {
93
150M
    for (size_t x = 0; x < S; x++) {
94
133M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
133M
    }
96
16.7M
  }
97
2.09M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.09M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
2.09M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.09M
  static_assert(S % 2 == 0, "S should be even");
70
2.09M
  float temp[kDCTBlockSize];
71
2.09M
  constexpr size_t num_2x2 = S / 2;
72
6.27M
  for (size_t y = 0; y < num_2x2; y++) {
73
12.5M
    for (size_t x = 0; x < num_2x2; x++) {
74
8.36M
      float c00 = block[y * 2 * stride + x * 2];
75
8.36M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
8.36M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
8.36M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
8.36M
      float r00 = c00 + c01 + c10 + c11;
79
8.36M
      float r01 = c00 + c01 - c10 - c11;
80
8.36M
      float r10 = c00 - c01 + c10 - c11;
81
8.36M
      float r11 = c00 - c01 - c10 + c11;
82
8.36M
      r00 *= 0.25f;
83
8.36M
      r01 *= 0.25f;
84
8.36M
      r10 *= 0.25f;
85
8.36M
      r11 *= 0.25f;
86
8.36M
      temp[y * kBlockDim + x] = r00;
87
8.36M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
8.36M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
8.36M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
8.36M
    }
91
4.18M
  }
92
10.4M
  for (size_t y = 0; y < S; y++) {
93
41.8M
    for (size_t x = 0; x < S; x++) {
94
33.4M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
33.4M
    }
96
8.36M
  }
97
2.09M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.09M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
2.09M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.09M
  static_assert(S % 2 == 0, "S should be even");
70
2.09M
  float temp[kDCTBlockSize];
71
2.09M
  constexpr size_t num_2x2 = S / 2;
72
4.18M
  for (size_t y = 0; y < num_2x2; y++) {
73
4.18M
    for (size_t x = 0; x < num_2x2; x++) {
74
2.09M
      float c00 = block[y * 2 * stride + x * 2];
75
2.09M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
2.09M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
2.09M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
2.09M
      float r00 = c00 + c01 + c10 + c11;
79
2.09M
      float r01 = c00 + c01 - c10 - c11;
80
2.09M
      float r10 = c00 - c01 + c10 - c11;
81
2.09M
      float r11 = c00 - c01 - c10 + c11;
82
2.09M
      r00 *= 0.25f;
83
2.09M
      r01 *= 0.25f;
84
2.09M
      r10 *= 0.25f;
85
2.09M
      r11 *= 0.25f;
86
2.09M
      temp[y * kBlockDim + x] = r00;
87
2.09M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
2.09M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
2.09M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
2.09M
    }
91
2.09M
  }
92
6.27M
  for (size_t y = 0; y < S; y++) {
93
12.5M
    for (size_t x = 0; x < S; x++) {
94
8.36M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
8.36M
    }
96
4.18M
  }
97
2.09M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.09M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
2.09M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.09M
  static_assert(S % 2 == 0, "S should be even");
70
2.09M
  float temp[kDCTBlockSize];
71
2.09M
  constexpr size_t num_2x2 = S / 2;
72
10.4M
  for (size_t y = 0; y < num_2x2; y++) {
73
41.8M
    for (size_t x = 0; x < num_2x2; x++) {
74
33.4M
      float c00 = block[y * 2 * stride + x * 2];
75
33.4M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
33.4M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
33.4M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
33.4M
      float r00 = c00 + c01 + c10 + c11;
79
33.4M
      float r01 = c00 + c01 - c10 - c11;
80
33.4M
      float r10 = c00 - c01 + c10 - c11;
81
33.4M
      float r11 = c00 - c01 - c10 + c11;
82
33.4M
      r00 *= 0.25f;
83
33.4M
      r01 *= 0.25f;
84
33.4M
      r10 *= 0.25f;
85
33.4M
      r11 *= 0.25f;
86
33.4M
      temp[y * kBlockDim + x] = r00;
87
33.4M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
33.4M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
33.4M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
33.4M
    }
91
8.36M
  }
92
18.8M
  for (size_t y = 0; y < S; y++) {
93
150M
    for (size_t x = 0; x < S; x++) {
94
133M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
133M
    }
96
16.7M
  }
97
2.09M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.09M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
2.09M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.09M
  static_assert(S % 2 == 0, "S should be even");
70
2.09M
  float temp[kDCTBlockSize];
71
2.09M
  constexpr size_t num_2x2 = S / 2;
72
6.27M
  for (size_t y = 0; y < num_2x2; y++) {
73
12.5M
    for (size_t x = 0; x < num_2x2; x++) {
74
8.36M
      float c00 = block[y * 2 * stride + x * 2];
75
8.36M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
8.36M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
8.36M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
8.36M
      float r00 = c00 + c01 + c10 + c11;
79
8.36M
      float r01 = c00 + c01 - c10 - c11;
80
8.36M
      float r10 = c00 - c01 + c10 - c11;
81
8.36M
      float r11 = c00 - c01 - c10 + c11;
82
8.36M
      r00 *= 0.25f;
83
8.36M
      r01 *= 0.25f;
84
8.36M
      r10 *= 0.25f;
85
8.36M
      r11 *= 0.25f;
86
8.36M
      temp[y * kBlockDim + x] = r00;
87
8.36M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
8.36M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
8.36M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
8.36M
    }
91
4.18M
  }
92
10.4M
  for (size_t y = 0; y < S; y++) {
93
41.8M
    for (size_t x = 0; x < S; x++) {
94
33.4M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
33.4M
    }
96
8.36M
  }
97
2.09M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.09M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
2.09M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.09M
  static_assert(S % 2 == 0, "S should be even");
70
2.09M
  float temp[kDCTBlockSize];
71
2.09M
  constexpr size_t num_2x2 = S / 2;
72
4.18M
  for (size_t y = 0; y < num_2x2; y++) {
73
4.18M
    for (size_t x = 0; x < num_2x2; x++) {
74
2.09M
      float c00 = block[y * 2 * stride + x * 2];
75
2.09M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
2.09M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
2.09M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
2.09M
      float r00 = c00 + c01 + c10 + c11;
79
2.09M
      float r01 = c00 + c01 - c10 - c11;
80
2.09M
      float r10 = c00 - c01 + c10 - c11;
81
2.09M
      float r11 = c00 - c01 - c10 + c11;
82
2.09M
      r00 *= 0.25f;
83
2.09M
      r01 *= 0.25f;
84
2.09M
      r10 *= 0.25f;
85
2.09M
      r11 *= 0.25f;
86
2.09M
      temp[y * kBlockDim + x] = r00;
87
2.09M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
2.09M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
2.09M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
2.09M
    }
91
2.09M
  }
92
6.27M
  for (size_t y = 0; y < S; y++) {
93
12.5M
    for (size_t x = 0; x < S; x++) {
94
8.36M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
8.36M
    }
96
4.18M
  }
97
2.09M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
13.2M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
13.2M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
13.2M
  static_assert(S % 2 == 0, "S should be even");
70
13.2M
  float temp[kDCTBlockSize];
71
13.2M
  constexpr size_t num_2x2 = S / 2;
72
66.0M
  for (size_t y = 0; y < num_2x2; y++) {
73
264M
    for (size_t x = 0; x < num_2x2; x++) {
74
211M
      float c00 = block[y * 2 * stride + x * 2];
75
211M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
211M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
211M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
211M
      float r00 = c00 + c01 + c10 + c11;
79
211M
      float r01 = c00 + c01 - c10 - c11;
80
211M
      float r10 = c00 - c01 + c10 - c11;
81
211M
      float r11 = c00 - c01 - c10 + c11;
82
211M
      r00 *= 0.25f;
83
211M
      r01 *= 0.25f;
84
211M
      r10 *= 0.25f;
85
211M
      r11 *= 0.25f;
86
211M
      temp[y * kBlockDim + x] = r00;
87
211M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
211M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
211M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
211M
    }
91
52.8M
  }
92
118M
  for (size_t y = 0; y < S; y++) {
93
950M
    for (size_t x = 0; x < S; x++) {
94
845M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
845M
    }
96
105M
  }
97
13.2M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
13.2M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
13.2M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
13.2M
  static_assert(S % 2 == 0, "S should be even");
70
13.2M
  float temp[kDCTBlockSize];
71
13.2M
  constexpr size_t num_2x2 = S / 2;
72
39.6M
  for (size_t y = 0; y < num_2x2; y++) {
73
79.2M
    for (size_t x = 0; x < num_2x2; x++) {
74
52.8M
      float c00 = block[y * 2 * stride + x * 2];
75
52.8M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
52.8M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
52.8M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
52.8M
      float r00 = c00 + c01 + c10 + c11;
79
52.8M
      float r01 = c00 + c01 - c10 - c11;
80
52.8M
      float r10 = c00 - c01 + c10 - c11;
81
52.8M
      float r11 = c00 - c01 - c10 + c11;
82
52.8M
      r00 *= 0.25f;
83
52.8M
      r01 *= 0.25f;
84
52.8M
      r10 *= 0.25f;
85
52.8M
      r11 *= 0.25f;
86
52.8M
      temp[y * kBlockDim + x] = r00;
87
52.8M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
52.8M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
52.8M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
52.8M
    }
91
26.4M
  }
92
66.0M
  for (size_t y = 0; y < S; y++) {
93
264M
    for (size_t x = 0; x < S; x++) {
94
211M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
211M
    }
96
52.8M
  }
97
13.2M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
13.2M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
13.2M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
13.2M
  static_assert(S % 2 == 0, "S should be even");
70
13.2M
  float temp[kDCTBlockSize];
71
13.2M
  constexpr size_t num_2x2 = S / 2;
72
26.4M
  for (size_t y = 0; y < num_2x2; y++) {
73
26.4M
    for (size_t x = 0; x < num_2x2; x++) {
74
13.2M
      float c00 = block[y * 2 * stride + x * 2];
75
13.2M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
13.2M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
13.2M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
13.2M
      float r00 = c00 + c01 + c10 + c11;
79
13.2M
      float r01 = c00 + c01 - c10 - c11;
80
13.2M
      float r10 = c00 - c01 + c10 - c11;
81
13.2M
      float r11 = c00 - c01 - c10 + c11;
82
13.2M
      r00 *= 0.25f;
83
13.2M
      r01 *= 0.25f;
84
13.2M
      r10 *= 0.25f;
85
13.2M
      r11 *= 0.25f;
86
13.2M
      temp[y * kBlockDim + x] = r00;
87
13.2M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
13.2M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
13.2M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
13.2M
    }
91
13.2M
  }
92
39.6M
  for (size_t y = 0; y < S; y++) {
93
79.2M
    for (size_t x = 0; x < S; x++) {
94
52.8M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
52.8M
    }
96
26.4M
  }
97
13.2M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
98
99
53.4M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
53.4M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
53.4M
      {
102
53.4M
          0.2500000000000000,
103
53.4M
          0.8769029297991420f,
104
53.4M
          0.0000000000000000,
105
53.4M
          0.0000000000000000,
106
53.4M
          0.0000000000000000,
107
53.4M
          -0.4105377591765233f,
108
53.4M
          0.0000000000000000,
109
53.4M
          0.0000000000000000,
110
53.4M
          0.0000000000000000,
111
53.4M
          0.0000000000000000,
112
53.4M
          0.0000000000000000,
113
53.4M
          0.0000000000000000,
114
53.4M
          0.0000000000000000,
115
53.4M
          0.0000000000000000,
116
53.4M
          0.0000000000000000,
117
53.4M
          0.0000000000000000,
118
53.4M
      },
119
53.4M
      {
120
53.4M
          0.2500000000000000,
121
53.4M
          0.2206518106944235f,
122
53.4M
          0.0000000000000000,
123
53.4M
          0.0000000000000000,
124
53.4M
          -0.7071067811865474f,
125
53.4M
          0.6235485373547691f,
126
53.4M
          0.0000000000000000,
127
53.4M
          0.0000000000000000,
128
53.4M
          0.0000000000000000,
129
53.4M
          0.0000000000000000,
130
53.4M
          0.0000000000000000,
131
53.4M
          0.0000000000000000,
132
53.4M
          0.0000000000000000,
133
53.4M
          0.0000000000000000,
134
53.4M
          0.0000000000000000,
135
53.4M
          0.0000000000000000,
136
53.4M
      },
137
53.4M
      {
138
53.4M
          0.2500000000000000,
139
53.4M
          -0.1014005039375376f,
140
53.4M
          0.4067007583026075f,
141
53.4M
          -0.2125574805828875f,
142
53.4M
          0.0000000000000000,
143
53.4M
          -0.0643507165794627f,
144
53.4M
          -0.4517556589999482f,
145
53.4M
          -0.3046847507248690f,
146
53.4M
          0.3017929516615495f,
147
53.4M
          0.4082482904638627f,
148
53.4M
          0.1747866975480809f,
149
53.4M
          -0.2110560104933578f,
150
53.4M
          -0.1426608480880726f,
151
53.4M
          -0.1381354035075859f,
152
53.4M
          -0.1743760259965107f,
153
53.4M
          0.1135498731499434f,
154
53.4M
      },
155
53.4M
      {
156
53.4M
          0.2500000000000000,
157
53.4M
          -0.1014005039375375f,
158
53.4M
          0.4444481661973445f,
159
53.4M
          0.3085497062849767f,
160
53.4M
          0.0000000000000000f,
161
53.4M
          -0.0643507165794627f,
162
53.4M
          0.1585450355184006f,
163
53.4M
          0.5112616136591823f,
164
53.4M
          0.2579236279634118f,
165
53.4M
          0.0000000000000000,
166
53.4M
          0.0812611176717539f,
167
53.4M
          0.1856718091610980f,
168
53.4M
          -0.3416446842253372f,
169
53.4M
          0.3302282550303788f,
170
53.4M
          0.0702790691196284f,
171
53.4M
          -0.0741750459581035f,
172
53.4M
      },
173
53.4M
      {
174
53.4M
          0.2500000000000000,
175
53.4M
          0.2206518106944236f,
176
53.4M
          0.0000000000000000,
177
53.4M
          0.0000000000000000,
178
53.4M
          0.7071067811865476f,
179
53.4M
          0.6235485373547694f,
180
53.4M
          0.0000000000000000,
181
53.4M
          0.0000000000000000,
182
53.4M
          0.0000000000000000,
183
53.4M
          0.0000000000000000,
184
53.4M
          0.0000000000000000,
185
53.4M
          0.0000000000000000,
186
53.4M
          0.0000000000000000,
187
53.4M
          0.0000000000000000,
188
53.4M
          0.0000000000000000,
189
53.4M
          0.0000000000000000,
190
53.4M
      },
191
53.4M
      {
192
53.4M
          0.2500000000000000,
193
53.4M
          -0.1014005039375378f,
194
53.4M
          0.0000000000000000,
195
53.4M
          0.4706702258572536f,
196
53.4M
          0.0000000000000000,
197
53.4M
          -0.0643507165794628f,
198
53.4M
          -0.0403851516082220f,
199
53.4M
          0.0000000000000000,
200
53.4M
          0.1627234014286620f,
201
53.4M
          0.0000000000000000,
202
53.4M
          0.0000000000000000,
203
53.4M
          0.0000000000000000,
204
53.4M
          0.7367497537172237f,
205
53.4M
          0.0875511500058708f,
206
53.4M
          -0.2921026642334881f,
207
53.4M
          0.1940289303259434f,
208
53.4M
      },
209
53.4M
      {
210
53.4M
          0.2500000000000000,
211
53.4M
          -0.1014005039375377f,
212
53.4M
          0.1957439937204294f,
213
53.4M
          -0.1621205195722993f,
214
53.4M
          0.0000000000000000,
215
53.4M
          -0.0643507165794628f,
216
53.4M
          0.0074182263792424f,
217
53.4M
          -0.2904801297289980f,
218
53.4M
          0.0952002265347504f,
219
53.4M
          0.0000000000000000,
220
53.4M
          -0.3675398009862027f,
221
53.4M
          0.4921585901373873f,
222
53.4M
          0.2462710772207515f,
223
53.4M
          -0.0794670660590957f,
224
53.4M
          0.3623817333531167f,
225
53.4M
          -0.4351904965232280f,
226
53.4M
      },
227
53.4M
      {
228
53.4M
          0.2500000000000000,
229
53.4M
          -0.1014005039375376f,
230
53.4M
          0.2929100136981264f,
231
53.4M
          0.0000000000000000,
232
53.4M
          0.0000000000000000,
233
53.4M
          -0.0643507165794627f,
234
53.4M
          0.3935103426921017f,
235
53.4M
          -0.0657870154914280f,
236
53.4M
          0.0000000000000000,
237
53.4M
          -0.4082482904638628f,
238
53.4M
          -0.3078822139579090f,
239
53.4M
          -0.3852501370925192f,
240
53.4M
          -0.0857401903551931f,
241
53.4M
          -0.4613374887461511f,
242
53.4M
          0.0000000000000000,
243
53.4M
          0.2191868483885747f,
244
53.4M
      },
245
53.4M
      {
246
53.4M
          0.2500000000000000,
247
53.4M
          -0.1014005039375376f,
248
53.4M
          -0.4067007583026072f,
249
53.4M
          -0.2125574805828705f,
250
53.4M
          0.0000000000000000,
251
53.4M
          -0.0643507165794627f,
252
53.4M
          -0.4517556589999464f,
253
53.4M
          0.3046847507248840f,
254
53.4M
          0.3017929516615503f,
255
53.4M
          -0.4082482904638635f,
256
53.4M
          -0.1747866975480813f,
257
53.4M
          0.2110560104933581f,
258
53.4M
          -0.1426608480880734f,
259
53.4M
          -0.1381354035075829f,
260
53.4M
          -0.1743760259965108f,
261
53.4M
          0.1135498731499426f,
262
53.4M
      },
263
53.4M
      {
264
53.4M
          0.2500000000000000,
265
53.4M
          -0.1014005039375377f,
266
53.4M
          -0.1957439937204287f,
267
53.4M
          -0.1621205195722833f,
268
53.4M
          0.0000000000000000,
269
53.4M
          -0.0643507165794628f,
270
53.4M
          0.0074182263792444f,
271
53.4M
          0.2904801297290076f,
272
53.4M
          0.0952002265347505f,
273
53.4M
          0.0000000000000000,
274
53.4M
          0.3675398009862011f,
275
53.4M
          -0.4921585901373891f,
276
53.4M
          0.2462710772207514f,
277
53.4M
          -0.0794670660591026f,
278
53.4M
          0.3623817333531165f,
279
53.4M
          -0.4351904965232251f,
280
53.4M
      },
281
53.4M
      {
282
53.4M
          0.2500000000000000,
283
53.4M
          -0.1014005039375375f,
284
53.4M
          0.0000000000000000,
285
53.4M
          -0.4706702258572528f,
286
53.4M
          0.0000000000000000,
287
53.4M
          -0.0643507165794627f,
288
53.4M
          0.1107416575309343f,
289
53.4M
          0.0000000000000000,
290
53.4M
          -0.1627234014286617f,
291
53.4M
          0.0000000000000000,
292
53.4M
          0.0000000000000000,
293
53.4M
          0.0000000000000000,
294
53.4M
          0.1488339922711357f,
295
53.4M
          0.4972464710953509f,
296
53.4M
          0.2921026642334879f,
297
53.4M
          0.5550443808910661f,
298
53.4M
      },
299
53.4M
      {
300
53.4M
          0.2500000000000000,
301
53.4M
          -0.1014005039375377f,
302
53.4M
          0.1137907446044809f,
303
53.4M
          -0.1464291867126764f,
304
53.4M
          0.0000000000000000,
305
53.4M
          -0.0643507165794628f,
306
53.4M
          0.0829816309488205f,
307
53.4M
          -0.2388977352334460f,
308
53.4M
          -0.3531238544981630f,
309
53.4M
          -0.4082482904638630f,
310
53.4M
          0.4826689115059883f,
311
53.4M
          0.1741941265991622f,
312
53.4M
          -0.0476868035022925f,
313
53.4M
          0.1253805944856366f,
314
53.4M
          -0.4326608024727445f,
315
53.4M
          -0.2546827712406646f,
316
53.4M
      },
317
53.4M
      {
318
53.4M
          0.2500000000000000,
319
53.4M
          -0.1014005039375377f,
320
53.4M
          -0.4444481661973438f,
321
53.4M
          0.3085497062849487f,
322
53.4M
          0.0000000000000000,
323
53.4M
          -0.0643507165794628f,
324
53.4M
          0.1585450355183970f,
325
53.4M
          -0.5112616136592012f,
326
53.4M
          0.2579236279634129f,
327
53.4M
          0.0000000000000000,
328
53.4M
          -0.0812611176717504f,
329
53.4M
          -0.1856718091610990f,
330
53.4M
          -0.3416446842253373f,
331
53.4M
          0.3302282550303805f,
332
53.4M
          0.0702790691196282f,
333
53.4M
          -0.0741750459581023f,
334
53.4M
      },
335
53.4M
      {
336
53.4M
          0.2500000000000000,
337
53.4M
          -0.1014005039375376f,
338
53.4M
          -0.2929100136981264f,
339
53.4M
          0.0000000000000000,
340
53.4M
          0.0000000000000000,
341
53.4M
          -0.0643507165794627f,
342
53.4M
          0.3935103426921022f,
343
53.4M
          0.0657870154914254f,
344
53.4M
          0.0000000000000000,
345
53.4M
          0.4082482904638634f,
346
53.4M
          0.3078822139579031f,
347
53.4M
          0.3852501370925211f,
348
53.4M
          -0.0857401903551927f,
349
53.4M
          -0.4613374887461554f,
350
53.4M
          0.0000000000000000,
351
53.4M
          0.2191868483885728f,
352
53.4M
      },
353
53.4M
      {
354
53.4M
          0.2500000000000000,
355
53.4M
          -0.1014005039375376f,
356
53.4M
          -0.1137907446044814f,
357
53.4M
          -0.1464291867126654f,
358
53.4M
          0.0000000000000000,
359
53.4M
          -0.0643507165794627f,
360
53.4M
          0.0829816309488214f,
361
53.4M
          0.2388977352334547f,
362
53.4M
          -0.3531238544981624f,
363
53.4M
          0.4082482904638630f,
364
53.4M
          -0.4826689115059858f,
365
53.4M
          -0.1741941265991621f,
366
53.4M
          -0.0476868035022928f,
367
53.4M
          0.1253805944856431f,
368
53.4M
          -0.4326608024727457f,
369
53.4M
          -0.2546827712406641f,
370
53.4M
      },
371
53.4M
      {
372
53.4M
          0.2500000000000000,
373
53.4M
          -0.1014005039375374f,
374
53.4M
          0.0000000000000000,
375
53.4M
          0.4251149611657548f,
376
53.4M
          0.0000000000000000,
377
53.4M
          -0.0643507165794626f,
378
53.4M
          -0.4517556589999480f,
379
53.4M
          0.0000000000000000,
380
53.4M
          -0.6035859033230976f,
381
53.4M
          0.0000000000000000,
382
53.4M
          0.0000000000000000,
383
53.4M
          0.0000000000000000,
384
53.4M
          -0.1426608480880724f,
385
53.4M
          -0.1381354035075845f,
386
53.4M
          0.3487520519930227f,
387
53.4M
          0.1135498731499429f,
388
53.4M
      },
389
53.4M
  };
390
391
53.4M
  const HWY_CAPPED(float, 16) d;
392
160M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
106M
    auto scalar = Zero(d);
394
1.81G
    for (size_t j = 0; j < 16; j++) {
395
1.71G
      auto px = Set(d, pixels[j]);
396
1.71G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
1.71G
      scalar = MulAdd(px, basis, scalar);
398
1.71G
    }
399
106M
    Store(scalar, d, coeffs + i);
400
106M
  }
401
53.4M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
310k
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
310k
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
310k
      {
102
310k
          0.2500000000000000,
103
310k
          0.8769029297991420f,
104
310k
          0.0000000000000000,
105
310k
          0.0000000000000000,
106
310k
          0.0000000000000000,
107
310k
          -0.4105377591765233f,
108
310k
          0.0000000000000000,
109
310k
          0.0000000000000000,
110
310k
          0.0000000000000000,
111
310k
          0.0000000000000000,
112
310k
          0.0000000000000000,
113
310k
          0.0000000000000000,
114
310k
          0.0000000000000000,
115
310k
          0.0000000000000000,
116
310k
          0.0000000000000000,
117
310k
          0.0000000000000000,
118
310k
      },
119
310k
      {
120
310k
          0.2500000000000000,
121
310k
          0.2206518106944235f,
122
310k
          0.0000000000000000,
123
310k
          0.0000000000000000,
124
310k
          -0.7071067811865474f,
125
310k
          0.6235485373547691f,
126
310k
          0.0000000000000000,
127
310k
          0.0000000000000000,
128
310k
          0.0000000000000000,
129
310k
          0.0000000000000000,
130
310k
          0.0000000000000000,
131
310k
          0.0000000000000000,
132
310k
          0.0000000000000000,
133
310k
          0.0000000000000000,
134
310k
          0.0000000000000000,
135
310k
          0.0000000000000000,
136
310k
      },
137
310k
      {
138
310k
          0.2500000000000000,
139
310k
          -0.1014005039375376f,
140
310k
          0.4067007583026075f,
141
310k
          -0.2125574805828875f,
142
310k
          0.0000000000000000,
143
310k
          -0.0643507165794627f,
144
310k
          -0.4517556589999482f,
145
310k
          -0.3046847507248690f,
146
310k
          0.3017929516615495f,
147
310k
          0.4082482904638627f,
148
310k
          0.1747866975480809f,
149
310k
          -0.2110560104933578f,
150
310k
          -0.1426608480880726f,
151
310k
          -0.1381354035075859f,
152
310k
          -0.1743760259965107f,
153
310k
          0.1135498731499434f,
154
310k
      },
155
310k
      {
156
310k
          0.2500000000000000,
157
310k
          -0.1014005039375375f,
158
310k
          0.4444481661973445f,
159
310k
          0.3085497062849767f,
160
310k
          0.0000000000000000f,
161
310k
          -0.0643507165794627f,
162
310k
          0.1585450355184006f,
163
310k
          0.5112616136591823f,
164
310k
          0.2579236279634118f,
165
310k
          0.0000000000000000,
166
310k
          0.0812611176717539f,
167
310k
          0.1856718091610980f,
168
310k
          -0.3416446842253372f,
169
310k
          0.3302282550303788f,
170
310k
          0.0702790691196284f,
171
310k
          -0.0741750459581035f,
172
310k
      },
173
310k
      {
174
310k
          0.2500000000000000,
175
310k
          0.2206518106944236f,
176
310k
          0.0000000000000000,
177
310k
          0.0000000000000000,
178
310k
          0.7071067811865476f,
179
310k
          0.6235485373547694f,
180
310k
          0.0000000000000000,
181
310k
          0.0000000000000000,
182
310k
          0.0000000000000000,
183
310k
          0.0000000000000000,
184
310k
          0.0000000000000000,
185
310k
          0.0000000000000000,
186
310k
          0.0000000000000000,
187
310k
          0.0000000000000000,
188
310k
          0.0000000000000000,
189
310k
          0.0000000000000000,
190
310k
      },
191
310k
      {
192
310k
          0.2500000000000000,
193
310k
          -0.1014005039375378f,
194
310k
          0.0000000000000000,
195
310k
          0.4706702258572536f,
196
310k
          0.0000000000000000,
197
310k
          -0.0643507165794628f,
198
310k
          -0.0403851516082220f,
199
310k
          0.0000000000000000,
200
310k
          0.1627234014286620f,
201
310k
          0.0000000000000000,
202
310k
          0.0000000000000000,
203
310k
          0.0000000000000000,
204
310k
          0.7367497537172237f,
205
310k
          0.0875511500058708f,
206
310k
          -0.2921026642334881f,
207
310k
          0.1940289303259434f,
208
310k
      },
209
310k
      {
210
310k
          0.2500000000000000,
211
310k
          -0.1014005039375377f,
212
310k
          0.1957439937204294f,
213
310k
          -0.1621205195722993f,
214
310k
          0.0000000000000000,
215
310k
          -0.0643507165794628f,
216
310k
          0.0074182263792424f,
217
310k
          -0.2904801297289980f,
218
310k
          0.0952002265347504f,
219
310k
          0.0000000000000000,
220
310k
          -0.3675398009862027f,
221
310k
          0.4921585901373873f,
222
310k
          0.2462710772207515f,
223
310k
          -0.0794670660590957f,
224
310k
          0.3623817333531167f,
225
310k
          -0.4351904965232280f,
226
310k
      },
227
310k
      {
228
310k
          0.2500000000000000,
229
310k
          -0.1014005039375376f,
230
310k
          0.2929100136981264f,
231
310k
          0.0000000000000000,
232
310k
          0.0000000000000000,
233
310k
          -0.0643507165794627f,
234
310k
          0.3935103426921017f,
235
310k
          -0.0657870154914280f,
236
310k
          0.0000000000000000,
237
310k
          -0.4082482904638628f,
238
310k
          -0.3078822139579090f,
239
310k
          -0.3852501370925192f,
240
310k
          -0.0857401903551931f,
241
310k
          -0.4613374887461511f,
242
310k
          0.0000000000000000,
243
310k
          0.2191868483885747f,
244
310k
      },
245
310k
      {
246
310k
          0.2500000000000000,
247
310k
          -0.1014005039375376f,
248
310k
          -0.4067007583026072f,
249
310k
          -0.2125574805828705f,
250
310k
          0.0000000000000000,
251
310k
          -0.0643507165794627f,
252
310k
          -0.4517556589999464f,
253
310k
          0.3046847507248840f,
254
310k
          0.3017929516615503f,
255
310k
          -0.4082482904638635f,
256
310k
          -0.1747866975480813f,
257
310k
          0.2110560104933581f,
258
310k
          -0.1426608480880734f,
259
310k
          -0.1381354035075829f,
260
310k
          -0.1743760259965108f,
261
310k
          0.1135498731499426f,
262
310k
      },
263
310k
      {
264
310k
          0.2500000000000000,
265
310k
          -0.1014005039375377f,
266
310k
          -0.1957439937204287f,
267
310k
          -0.1621205195722833f,
268
310k
          0.0000000000000000,
269
310k
          -0.0643507165794628f,
270
310k
          0.0074182263792444f,
271
310k
          0.2904801297290076f,
272
310k
          0.0952002265347505f,
273
310k
          0.0000000000000000,
274
310k
          0.3675398009862011f,
275
310k
          -0.4921585901373891f,
276
310k
          0.2462710772207514f,
277
310k
          -0.0794670660591026f,
278
310k
          0.3623817333531165f,
279
310k
          -0.4351904965232251f,
280
310k
      },
281
310k
      {
282
310k
          0.2500000000000000,
283
310k
          -0.1014005039375375f,
284
310k
          0.0000000000000000,
285
310k
          -0.4706702258572528f,
286
310k
          0.0000000000000000,
287
310k
          -0.0643507165794627f,
288
310k
          0.1107416575309343f,
289
310k
          0.0000000000000000,
290
310k
          -0.1627234014286617f,
291
310k
          0.0000000000000000,
292
310k
          0.0000000000000000,
293
310k
          0.0000000000000000,
294
310k
          0.1488339922711357f,
295
310k
          0.4972464710953509f,
296
310k
          0.2921026642334879f,
297
310k
          0.5550443808910661f,
298
310k
      },
299
310k
      {
300
310k
          0.2500000000000000,
301
310k
          -0.1014005039375377f,
302
310k
          0.1137907446044809f,
303
310k
          -0.1464291867126764f,
304
310k
          0.0000000000000000,
305
310k
          -0.0643507165794628f,
306
310k
          0.0829816309488205f,
307
310k
          -0.2388977352334460f,
308
310k
          -0.3531238544981630f,
309
310k
          -0.4082482904638630f,
310
310k
          0.4826689115059883f,
311
310k
          0.1741941265991622f,
312
310k
          -0.0476868035022925f,
313
310k
          0.1253805944856366f,
314
310k
          -0.4326608024727445f,
315
310k
          -0.2546827712406646f,
316
310k
      },
317
310k
      {
318
310k
          0.2500000000000000,
319
310k
          -0.1014005039375377f,
320
310k
          -0.4444481661973438f,
321
310k
          0.3085497062849487f,
322
310k
          0.0000000000000000,
323
310k
          -0.0643507165794628f,
324
310k
          0.1585450355183970f,
325
310k
          -0.5112616136592012f,
326
310k
          0.2579236279634129f,
327
310k
          0.0000000000000000,
328
310k
          -0.0812611176717504f,
329
310k
          -0.1856718091610990f,
330
310k
          -0.3416446842253373f,
331
310k
          0.3302282550303805f,
332
310k
          0.0702790691196282f,
333
310k
          -0.0741750459581023f,
334
310k
      },
335
310k
      {
336
310k
          0.2500000000000000,
337
310k
          -0.1014005039375376f,
338
310k
          -0.2929100136981264f,
339
310k
          0.0000000000000000,
340
310k
          0.0000000000000000,
341
310k
          -0.0643507165794627f,
342
310k
          0.3935103426921022f,
343
310k
          0.0657870154914254f,
344
310k
          0.0000000000000000,
345
310k
          0.4082482904638634f,
346
310k
          0.3078822139579031f,
347
310k
          0.3852501370925211f,
348
310k
          -0.0857401903551927f,
349
310k
          -0.4613374887461554f,
350
310k
          0.0000000000000000,
351
310k
          0.2191868483885728f,
352
310k
      },
353
310k
      {
354
310k
          0.2500000000000000,
355
310k
          -0.1014005039375376f,
356
310k
          -0.1137907446044814f,
357
310k
          -0.1464291867126654f,
358
310k
          0.0000000000000000,
359
310k
          -0.0643507165794627f,
360
310k
          0.0829816309488214f,
361
310k
          0.2388977352334547f,
362
310k
          -0.3531238544981624f,
363
310k
          0.4082482904638630f,
364
310k
          -0.4826689115059858f,
365
310k
          -0.1741941265991621f,
366
310k
          -0.0476868035022928f,
367
310k
          0.1253805944856431f,
368
310k
          -0.4326608024727457f,
369
310k
          -0.2546827712406641f,
370
310k
      },
371
310k
      {
372
310k
          0.2500000000000000,
373
310k
          -0.1014005039375374f,
374
310k
          0.0000000000000000,
375
310k
          0.4251149611657548f,
376
310k
          0.0000000000000000,
377
310k
          -0.0643507165794626f,
378
310k
          -0.4517556589999480f,
379
310k
          0.0000000000000000,
380
310k
          -0.6035859033230976f,
381
310k
          0.0000000000000000,
382
310k
          0.0000000000000000,
383
310k
          0.0000000000000000,
384
310k
          -0.1426608480880724f,
385
310k
          -0.1381354035075845f,
386
310k
          0.3487520519930227f,
387
310k
          0.1135498731499429f,
388
310k
      },
389
310k
  };
390
391
310k
  const HWY_CAPPED(float, 16) d;
392
931k
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
621k
    auto scalar = Zero(d);
394
10.5M
    for (size_t j = 0; j < 16; j++) {
395
9.93M
      auto px = Set(d, pixels[j]);
396
9.93M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
9.93M
      scalar = MulAdd(px, basis, scalar);
398
9.93M
    }
399
621k
    Store(scalar, d, coeffs + i);
400
621k
  }
401
310k
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
310k
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
310k
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
310k
      {
102
310k
          0.2500000000000000,
103
310k
          0.8769029297991420f,
104
310k
          0.0000000000000000,
105
310k
          0.0000000000000000,
106
310k
          0.0000000000000000,
107
310k
          -0.4105377591765233f,
108
310k
          0.0000000000000000,
109
310k
          0.0000000000000000,
110
310k
          0.0000000000000000,
111
310k
          0.0000000000000000,
112
310k
          0.0000000000000000,
113
310k
          0.0000000000000000,
114
310k
          0.0000000000000000,
115
310k
          0.0000000000000000,
116
310k
          0.0000000000000000,
117
310k
          0.0000000000000000,
118
310k
      },
119
310k
      {
120
310k
          0.2500000000000000,
121
310k
          0.2206518106944235f,
122
310k
          0.0000000000000000,
123
310k
          0.0000000000000000,
124
310k
          -0.7071067811865474f,
125
310k
          0.6235485373547691f,
126
310k
          0.0000000000000000,
127
310k
          0.0000000000000000,
128
310k
          0.0000000000000000,
129
310k
          0.0000000000000000,
130
310k
          0.0000000000000000,
131
310k
          0.0000000000000000,
132
310k
          0.0000000000000000,
133
310k
          0.0000000000000000,
134
310k
          0.0000000000000000,
135
310k
          0.0000000000000000,
136
310k
      },
137
310k
      {
138
310k
          0.2500000000000000,
139
310k
          -0.1014005039375376f,
140
310k
          0.4067007583026075f,
141
310k
          -0.2125574805828875f,
142
310k
          0.0000000000000000,
143
310k
          -0.0643507165794627f,
144
310k
          -0.4517556589999482f,
145
310k
          -0.3046847507248690f,
146
310k
          0.3017929516615495f,
147
310k
          0.4082482904638627f,
148
310k
          0.1747866975480809f,
149
310k
          -0.2110560104933578f,
150
310k
          -0.1426608480880726f,
151
310k
          -0.1381354035075859f,
152
310k
          -0.1743760259965107f,
153
310k
          0.1135498731499434f,
154
310k
      },
155
310k
      {
156
310k
          0.2500000000000000,
157
310k
          -0.1014005039375375f,
158
310k
          0.4444481661973445f,
159
310k
          0.3085497062849767f,
160
310k
          0.0000000000000000f,
161
310k
          -0.0643507165794627f,
162
310k
          0.1585450355184006f,
163
310k
          0.5112616136591823f,
164
310k
          0.2579236279634118f,
165
310k
          0.0000000000000000,
166
310k
          0.0812611176717539f,
167
310k
          0.1856718091610980f,
168
310k
          -0.3416446842253372f,
169
310k
          0.3302282550303788f,
170
310k
          0.0702790691196284f,
171
310k
          -0.0741750459581035f,
172
310k
      },
173
310k
      {
174
310k
          0.2500000000000000,
175
310k
          0.2206518106944236f,
176
310k
          0.0000000000000000,
177
310k
          0.0000000000000000,
178
310k
          0.7071067811865476f,
179
310k
          0.6235485373547694f,
180
310k
          0.0000000000000000,
181
310k
          0.0000000000000000,
182
310k
          0.0000000000000000,
183
310k
          0.0000000000000000,
184
310k
          0.0000000000000000,
185
310k
          0.0000000000000000,
186
310k
          0.0000000000000000,
187
310k
          0.0000000000000000,
188
310k
          0.0000000000000000,
189
310k
          0.0000000000000000,
190
310k
      },
191
310k
      {
192
310k
          0.2500000000000000,
193
310k
          -0.1014005039375378f,
194
310k
          0.0000000000000000,
195
310k
          0.4706702258572536f,
196
310k
          0.0000000000000000,
197
310k
          -0.0643507165794628f,
198
310k
          -0.0403851516082220f,
199
310k
          0.0000000000000000,
200
310k
          0.1627234014286620f,
201
310k
          0.0000000000000000,
202
310k
          0.0000000000000000,
203
310k
          0.0000000000000000,
204
310k
          0.7367497537172237f,
205
310k
          0.0875511500058708f,
206
310k
          -0.2921026642334881f,
207
310k
          0.1940289303259434f,
208
310k
      },
209
310k
      {
210
310k
          0.2500000000000000,
211
310k
          -0.1014005039375377f,
212
310k
          0.1957439937204294f,
213
310k
          -0.1621205195722993f,
214
310k
          0.0000000000000000,
215
310k
          -0.0643507165794628f,
216
310k
          0.0074182263792424f,
217
310k
          -0.2904801297289980f,
218
310k
          0.0952002265347504f,
219
310k
          0.0000000000000000,
220
310k
          -0.3675398009862027f,
221
310k
          0.4921585901373873f,
222
310k
          0.2462710772207515f,
223
310k
          -0.0794670660590957f,
224
310k
          0.3623817333531167f,
225
310k
          -0.4351904965232280f,
226
310k
      },
227
310k
      {
228
310k
          0.2500000000000000,
229
310k
          -0.1014005039375376f,
230
310k
          0.2929100136981264f,
231
310k
          0.0000000000000000,
232
310k
          0.0000000000000000,
233
310k
          -0.0643507165794627f,
234
310k
          0.3935103426921017f,
235
310k
          -0.0657870154914280f,
236
310k
          0.0000000000000000,
237
310k
          -0.4082482904638628f,
238
310k
          -0.3078822139579090f,
239
310k
          -0.3852501370925192f,
240
310k
          -0.0857401903551931f,
241
310k
          -0.4613374887461511f,
242
310k
          0.0000000000000000,
243
310k
          0.2191868483885747f,
244
310k
      },
245
310k
      {
246
310k
          0.2500000000000000,
247
310k
          -0.1014005039375376f,
248
310k
          -0.4067007583026072f,
249
310k
          -0.2125574805828705f,
250
310k
          0.0000000000000000,
251
310k
          -0.0643507165794627f,
252
310k
          -0.4517556589999464f,
253
310k
          0.3046847507248840f,
254
310k
          0.3017929516615503f,
255
310k
          -0.4082482904638635f,
256
310k
          -0.1747866975480813f,
257
310k
          0.2110560104933581f,
258
310k
          -0.1426608480880734f,
259
310k
          -0.1381354035075829f,
260
310k
          -0.1743760259965108f,
261
310k
          0.1135498731499426f,
262
310k
      },
263
310k
      {
264
310k
          0.2500000000000000,
265
310k
          -0.1014005039375377f,
266
310k
          -0.1957439937204287f,
267
310k
          -0.1621205195722833f,
268
310k
          0.0000000000000000,
269
310k
          -0.0643507165794628f,
270
310k
          0.0074182263792444f,
271
310k
          0.2904801297290076f,
272
310k
          0.0952002265347505f,
273
310k
          0.0000000000000000,
274
310k
          0.3675398009862011f,
275
310k
          -0.4921585901373891f,
276
310k
          0.2462710772207514f,
277
310k
          -0.0794670660591026f,
278
310k
          0.3623817333531165f,
279
310k
          -0.4351904965232251f,
280
310k
      },
281
310k
      {
282
310k
          0.2500000000000000,
283
310k
          -0.1014005039375375f,
284
310k
          0.0000000000000000,
285
310k
          -0.4706702258572528f,
286
310k
          0.0000000000000000,
287
310k
          -0.0643507165794627f,
288
310k
          0.1107416575309343f,
289
310k
          0.0000000000000000,
290
310k
          -0.1627234014286617f,
291
310k
          0.0000000000000000,
292
310k
          0.0000000000000000,
293
310k
          0.0000000000000000,
294
310k
          0.1488339922711357f,
295
310k
          0.4972464710953509f,
296
310k
          0.2921026642334879f,
297
310k
          0.5550443808910661f,
298
310k
      },
299
310k
      {
300
310k
          0.2500000000000000,
301
310k
          -0.1014005039375377f,
302
310k
          0.1137907446044809f,
303
310k
          -0.1464291867126764f,
304
310k
          0.0000000000000000,
305
310k
          -0.0643507165794628f,
306
310k
          0.0829816309488205f,
307
310k
          -0.2388977352334460f,
308
310k
          -0.3531238544981630f,
309
310k
          -0.4082482904638630f,
310
310k
          0.4826689115059883f,
311
310k
          0.1741941265991622f,
312
310k
          -0.0476868035022925f,
313
310k
          0.1253805944856366f,
314
310k
          -0.4326608024727445f,
315
310k
          -0.2546827712406646f,
316
310k
      },
317
310k
      {
318
310k
          0.2500000000000000,
319
310k
          -0.1014005039375377f,
320
310k
          -0.4444481661973438f,
321
310k
          0.3085497062849487f,
322
310k
          0.0000000000000000,
323
310k
          -0.0643507165794628f,
324
310k
          0.1585450355183970f,
325
310k
          -0.5112616136592012f,
326
310k
          0.2579236279634129f,
327
310k
          0.0000000000000000,
328
310k
          -0.0812611176717504f,
329
310k
          -0.1856718091610990f,
330
310k
          -0.3416446842253373f,
331
310k
          0.3302282550303805f,
332
310k
          0.0702790691196282f,
333
310k
          -0.0741750459581023f,
334
310k
      },
335
310k
      {
336
310k
          0.2500000000000000,
337
310k
          -0.1014005039375376f,
338
310k
          -0.2929100136981264f,
339
310k
          0.0000000000000000,
340
310k
          0.0000000000000000,
341
310k
          -0.0643507165794627f,
342
310k
          0.3935103426921022f,
343
310k
          0.0657870154914254f,
344
310k
          0.0000000000000000,
345
310k
          0.4082482904638634f,
346
310k
          0.3078822139579031f,
347
310k
          0.3852501370925211f,
348
310k
          -0.0857401903551927f,
349
310k
          -0.4613374887461554f,
350
310k
          0.0000000000000000,
351
310k
          0.2191868483885728f,
352
310k
      },
353
310k
      {
354
310k
          0.2500000000000000,
355
310k
          -0.1014005039375376f,
356
310k
          -0.1137907446044814f,
357
310k
          -0.1464291867126654f,
358
310k
          0.0000000000000000,
359
310k
          -0.0643507165794627f,
360
310k
          0.0829816309488214f,
361
310k
          0.2388977352334547f,
362
310k
          -0.3531238544981624f,
363
310k
          0.4082482904638630f,
364
310k
          -0.4826689115059858f,
365
310k
          -0.1741941265991621f,
366
310k
          -0.0476868035022928f,
367
310k
          0.1253805944856431f,
368
310k
          -0.4326608024727457f,
369
310k
          -0.2546827712406641f,
370
310k
      },
371
310k
      {
372
310k
          0.2500000000000000,
373
310k
          -0.1014005039375374f,
374
310k
          0.0000000000000000,
375
310k
          0.4251149611657548f,
376
310k
          0.0000000000000000,
377
310k
          -0.0643507165794626f,
378
310k
          -0.4517556589999480f,
379
310k
          0.0000000000000000,
380
310k
          -0.6035859033230976f,
381
310k
          0.0000000000000000,
382
310k
          0.0000000000000000,
383
310k
          0.0000000000000000,
384
310k
          -0.1426608480880724f,
385
310k
          -0.1381354035075845f,
386
310k
          0.3487520519930227f,
387
310k
          0.1135498731499429f,
388
310k
      },
389
310k
  };
390
391
310k
  const HWY_CAPPED(float, 16) d;
392
931k
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
621k
    auto scalar = Zero(d);
394
10.5M
    for (size_t j = 0; j < 16; j++) {
395
9.93M
      auto px = Set(d, pixels[j]);
396
9.93M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
9.93M
      scalar = MulAdd(px, basis, scalar);
398
9.93M
    }
399
621k
    Store(scalar, d, coeffs + i);
400
621k
  }
401
310k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
52.8M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
52.8M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
52.8M
      {
102
52.8M
          0.2500000000000000,
103
52.8M
          0.8769029297991420f,
104
52.8M
          0.0000000000000000,
105
52.8M
          0.0000000000000000,
106
52.8M
          0.0000000000000000,
107
52.8M
          -0.4105377591765233f,
108
52.8M
          0.0000000000000000,
109
52.8M
          0.0000000000000000,
110
52.8M
          0.0000000000000000,
111
52.8M
          0.0000000000000000,
112
52.8M
          0.0000000000000000,
113
52.8M
          0.0000000000000000,
114
52.8M
          0.0000000000000000,
115
52.8M
          0.0000000000000000,
116
52.8M
          0.0000000000000000,
117
52.8M
          0.0000000000000000,
118
52.8M
      },
119
52.8M
      {
120
52.8M
          0.2500000000000000,
121
52.8M
          0.2206518106944235f,
122
52.8M
          0.0000000000000000,
123
52.8M
          0.0000000000000000,
124
52.8M
          -0.7071067811865474f,
125
52.8M
          0.6235485373547691f,
126
52.8M
          0.0000000000000000,
127
52.8M
          0.0000000000000000,
128
52.8M
          0.0000000000000000,
129
52.8M
          0.0000000000000000,
130
52.8M
          0.0000000000000000,
131
52.8M
          0.0000000000000000,
132
52.8M
          0.0000000000000000,
133
52.8M
          0.0000000000000000,
134
52.8M
          0.0000000000000000,
135
52.8M
          0.0000000000000000,
136
52.8M
      },
137
52.8M
      {
138
52.8M
          0.2500000000000000,
139
52.8M
          -0.1014005039375376f,
140
52.8M
          0.4067007583026075f,
141
52.8M
          -0.2125574805828875f,
142
52.8M
          0.0000000000000000,
143
52.8M
          -0.0643507165794627f,
144
52.8M
          -0.4517556589999482f,
145
52.8M
          -0.3046847507248690f,
146
52.8M
          0.3017929516615495f,
147
52.8M
          0.4082482904638627f,
148
52.8M
          0.1747866975480809f,
149
52.8M
          -0.2110560104933578f,
150
52.8M
          -0.1426608480880726f,
151
52.8M
          -0.1381354035075859f,
152
52.8M
          -0.1743760259965107f,
153
52.8M
          0.1135498731499434f,
154
52.8M
      },
155
52.8M
      {
156
52.8M
          0.2500000000000000,
157
52.8M
          -0.1014005039375375f,
158
52.8M
          0.4444481661973445f,
159
52.8M
          0.3085497062849767f,
160
52.8M
          0.0000000000000000f,
161
52.8M
          -0.0643507165794627f,
162
52.8M
          0.1585450355184006f,
163
52.8M
          0.5112616136591823f,
164
52.8M
          0.2579236279634118f,
165
52.8M
          0.0000000000000000,
166
52.8M
          0.0812611176717539f,
167
52.8M
          0.1856718091610980f,
168
52.8M
          -0.3416446842253372f,
169
52.8M
          0.3302282550303788f,
170
52.8M
          0.0702790691196284f,
171
52.8M
          -0.0741750459581035f,
172
52.8M
      },
173
52.8M
      {
174
52.8M
          0.2500000000000000,
175
52.8M
          0.2206518106944236f,
176
52.8M
          0.0000000000000000,
177
52.8M
          0.0000000000000000,
178
52.8M
          0.7071067811865476f,
179
52.8M
          0.6235485373547694f,
180
52.8M
          0.0000000000000000,
181
52.8M
          0.0000000000000000,
182
52.8M
          0.0000000000000000,
183
52.8M
          0.0000000000000000,
184
52.8M
          0.0000000000000000,
185
52.8M
          0.0000000000000000,
186
52.8M
          0.0000000000000000,
187
52.8M
          0.0000000000000000,
188
52.8M
          0.0000000000000000,
189
52.8M
          0.0000000000000000,
190
52.8M
      },
191
52.8M
      {
192
52.8M
          0.2500000000000000,
193
52.8M
          -0.1014005039375378f,
194
52.8M
          0.0000000000000000,
195
52.8M
          0.4706702258572536f,
196
52.8M
          0.0000000000000000,
197
52.8M
          -0.0643507165794628f,
198
52.8M
          -0.0403851516082220f,
199
52.8M
          0.0000000000000000,
200
52.8M
          0.1627234014286620f,
201
52.8M
          0.0000000000000000,
202
52.8M
          0.0000000000000000,
203
52.8M
          0.0000000000000000,
204
52.8M
          0.7367497537172237f,
205
52.8M
          0.0875511500058708f,
206
52.8M
          -0.2921026642334881f,
207
52.8M
          0.1940289303259434f,
208
52.8M
      },
209
52.8M
      {
210
52.8M
          0.2500000000000000,
211
52.8M
          -0.1014005039375377f,
212
52.8M
          0.1957439937204294f,
213
52.8M
          -0.1621205195722993f,
214
52.8M
          0.0000000000000000,
215
52.8M
          -0.0643507165794628f,
216
52.8M
          0.0074182263792424f,
217
52.8M
          -0.2904801297289980f,
218
52.8M
          0.0952002265347504f,
219
52.8M
          0.0000000000000000,
220
52.8M
          -0.3675398009862027f,
221
52.8M
          0.4921585901373873f,
222
52.8M
          0.2462710772207515f,
223
52.8M
          -0.0794670660590957f,
224
52.8M
          0.3623817333531167f,
225
52.8M
          -0.4351904965232280f,
226
52.8M
      },
227
52.8M
      {
228
52.8M
          0.2500000000000000,
229
52.8M
          -0.1014005039375376f,
230
52.8M
          0.2929100136981264f,
231
52.8M
          0.0000000000000000,
232
52.8M
          0.0000000000000000,
233
52.8M
          -0.0643507165794627f,
234
52.8M
          0.3935103426921017f,
235
52.8M
          -0.0657870154914280f,
236
52.8M
          0.0000000000000000,
237
52.8M
          -0.4082482904638628f,
238
52.8M
          -0.3078822139579090f,
239
52.8M
          -0.3852501370925192f,
240
52.8M
          -0.0857401903551931f,
241
52.8M
          -0.4613374887461511f,
242
52.8M
          0.0000000000000000,
243
52.8M
          0.2191868483885747f,
244
52.8M
      },
245
52.8M
      {
246
52.8M
          0.2500000000000000,
247
52.8M
          -0.1014005039375376f,
248
52.8M
          -0.4067007583026072f,
249
52.8M
          -0.2125574805828705f,
250
52.8M
          0.0000000000000000,
251
52.8M
          -0.0643507165794627f,
252
52.8M
          -0.4517556589999464f,
253
52.8M
          0.3046847507248840f,
254
52.8M
          0.3017929516615503f,
255
52.8M
          -0.4082482904638635f,
256
52.8M
          -0.1747866975480813f,
257
52.8M
          0.2110560104933581f,
258
52.8M
          -0.1426608480880734f,
259
52.8M
          -0.1381354035075829f,
260
52.8M
          -0.1743760259965108f,
261
52.8M
          0.1135498731499426f,
262
52.8M
      },
263
52.8M
      {
264
52.8M
          0.2500000000000000,
265
52.8M
          -0.1014005039375377f,
266
52.8M
          -0.1957439937204287f,
267
52.8M
          -0.1621205195722833f,
268
52.8M
          0.0000000000000000,
269
52.8M
          -0.0643507165794628f,
270
52.8M
          0.0074182263792444f,
271
52.8M
          0.2904801297290076f,
272
52.8M
          0.0952002265347505f,
273
52.8M
          0.0000000000000000,
274
52.8M
          0.3675398009862011f,
275
52.8M
          -0.4921585901373891f,
276
52.8M
          0.2462710772207514f,
277
52.8M
          -0.0794670660591026f,
278
52.8M
          0.3623817333531165f,
279
52.8M
          -0.4351904965232251f,
280
52.8M
      },
281
52.8M
      {
282
52.8M
          0.2500000000000000,
283
52.8M
          -0.1014005039375375f,
284
52.8M
          0.0000000000000000,
285
52.8M
          -0.4706702258572528f,
286
52.8M
          0.0000000000000000,
287
52.8M
          -0.0643507165794627f,
288
52.8M
          0.1107416575309343f,
289
52.8M
          0.0000000000000000,
290
52.8M
          -0.1627234014286617f,
291
52.8M
          0.0000000000000000,
292
52.8M
          0.0000000000000000,
293
52.8M
          0.0000000000000000,
294
52.8M
          0.1488339922711357f,
295
52.8M
          0.4972464710953509f,
296
52.8M
          0.2921026642334879f,
297
52.8M
          0.5550443808910661f,
298
52.8M
      },
299
52.8M
      {
300
52.8M
          0.2500000000000000,
301
52.8M
          -0.1014005039375377f,
302
52.8M
          0.1137907446044809f,
303
52.8M
          -0.1464291867126764f,
304
52.8M
          0.0000000000000000,
305
52.8M
          -0.0643507165794628f,
306
52.8M
          0.0829816309488205f,
307
52.8M
          -0.2388977352334460f,
308
52.8M
          -0.3531238544981630f,
309
52.8M
          -0.4082482904638630f,
310
52.8M
          0.4826689115059883f,
311
52.8M
          0.1741941265991622f,
312
52.8M
          -0.0476868035022925f,
313
52.8M
          0.1253805944856366f,
314
52.8M
          -0.4326608024727445f,
315
52.8M
          -0.2546827712406646f,
316
52.8M
      },
317
52.8M
      {
318
52.8M
          0.2500000000000000,
319
52.8M
          -0.1014005039375377f,
320
52.8M
          -0.4444481661973438f,
321
52.8M
          0.3085497062849487f,
322
52.8M
          0.0000000000000000,
323
52.8M
          -0.0643507165794628f,
324
52.8M
          0.1585450355183970f,
325
52.8M
          -0.5112616136592012f,
326
52.8M
          0.2579236279634129f,
327
52.8M
          0.0000000000000000,
328
52.8M
          -0.0812611176717504f,
329
52.8M
          -0.1856718091610990f,
330
52.8M
          -0.3416446842253373f,
331
52.8M
          0.3302282550303805f,
332
52.8M
          0.0702790691196282f,
333
52.8M
          -0.0741750459581023f,
334
52.8M
      },
335
52.8M
      {
336
52.8M
          0.2500000000000000,
337
52.8M
          -0.1014005039375376f,
338
52.8M
          -0.2929100136981264f,
339
52.8M
          0.0000000000000000,
340
52.8M
          0.0000000000000000,
341
52.8M
          -0.0643507165794627f,
342
52.8M
          0.3935103426921022f,
343
52.8M
          0.0657870154914254f,
344
52.8M
          0.0000000000000000,
345
52.8M
          0.4082482904638634f,
346
52.8M
          0.3078822139579031f,
347
52.8M
          0.3852501370925211f,
348
52.8M
          -0.0857401903551927f,
349
52.8M
          -0.4613374887461554f,
350
52.8M
          0.0000000000000000,
351
52.8M
          0.2191868483885728f,
352
52.8M
      },
353
52.8M
      {
354
52.8M
          0.2500000000000000,
355
52.8M
          -0.1014005039375376f,
356
52.8M
          -0.1137907446044814f,
357
52.8M
          -0.1464291867126654f,
358
52.8M
          0.0000000000000000,
359
52.8M
          -0.0643507165794627f,
360
52.8M
          0.0829816309488214f,
361
52.8M
          0.2388977352334547f,
362
52.8M
          -0.3531238544981624f,
363
52.8M
          0.4082482904638630f,
364
52.8M
          -0.4826689115059858f,
365
52.8M
          -0.1741941265991621f,
366
52.8M
          -0.0476868035022928f,
367
52.8M
          0.1253805944856431f,
368
52.8M
          -0.4326608024727457f,
369
52.8M
          -0.2546827712406641f,
370
52.8M
      },
371
52.8M
      {
372
52.8M
          0.2500000000000000,
373
52.8M
          -0.1014005039375374f,
374
52.8M
          0.0000000000000000,
375
52.8M
          0.4251149611657548f,
376
52.8M
          0.0000000000000000,
377
52.8M
          -0.0643507165794626f,
378
52.8M
          -0.4517556589999480f,
379
52.8M
          0.0000000000000000,
380
52.8M
          -0.6035859033230976f,
381
52.8M
          0.0000000000000000,
382
52.8M
          0.0000000000000000,
383
52.8M
          0.0000000000000000,
384
52.8M
          -0.1426608480880724f,
385
52.8M
          -0.1381354035075845f,
386
52.8M
          0.3487520519930227f,
387
52.8M
          0.1135498731499429f,
388
52.8M
      },
389
52.8M
  };
390
391
52.8M
  const HWY_CAPPED(float, 16) d;
392
158M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
105M
    auto scalar = Zero(d);
394
1.79G
    for (size_t j = 0; j < 16; j++) {
395
1.69G
      auto px = Set(d, pixels[j]);
396
1.69G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
1.69G
      scalar = MulAdd(px, basis, scalar);
398
1.69G
    }
399
105M
    Store(scalar, d, coeffs + i);
400
105M
  }
401
52.8M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
402
403
// Coefficient layout:
404
//  - (even, even) positions hold AFV coefficients
405
//  - (odd, even) positions hold DCT4x4 coefficients
406
//  - (any, odd) positions hold DCT4x8 coefficients
407
template <size_t afv_kind>
408
void AFVTransformFromPixels(const float* JXL_RESTRICT pixels,
409
                            size_t pixels_stride,
410
53.4M
                            float* JXL_RESTRICT coefficients) {
411
53.4M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
53.4M
  size_t afv_x = afv_kind & 1;
413
53.4M
  size_t afv_y = afv_kind / 2;
414
53.4M
  HWY_ALIGN float block[4 * 8] = {};
415
267M
  for (size_t iy = 0; iy < 4; iy++) {
416
1.06G
    for (size_t ix = 0; ix < 4; ix++) {
417
855M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
855M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
855M
    }
420
213M
  }
421
  // AFV coefficients in (even, even) positions.
422
53.4M
  HWY_ALIGN float coeff[4 * 4];
423
53.4M
  AFVDCT4x4(block, coeff);
424
267M
  for (size_t iy = 0; iy < 4; iy++) {
425
1.06G
    for (size_t ix = 0; ix < 4; ix++) {
426
855M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
855M
    }
428
213M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
53.4M
  ComputeScaledDCT<4, 4>()(
431
53.4M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
53.4M
              pixels_stride),
433
53.4M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
267M
  for (size_t iy = 0; iy < 4; iy++) {
436
1.92G
    for (size_t ix = 0; ix < 8; ix++) {
437
1.71G
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
1.71G
    }
439
213M
  }
440
  // 4x8 DCT of the other half of the block.
441
53.4M
  ComputeScaledDCT<4, 8>()(
442
53.4M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
53.4M
      block, scratch_space);
444
267M
  for (size_t iy = 0; iy < 4; iy++) {
445
1.92G
    for (size_t ix = 0; ix < 8; ix++) {
446
1.71G
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
1.71G
    }
448
213M
  }
449
53.4M
  float block00 = coefficients[0] * 0.25f;
450
53.4M
  float block01 = coefficients[1];
451
53.4M
  float block10 = coefficients[8];
452
53.4M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
53.4M
  coefficients[1] = (block00 - block01) * 0.5f;
454
53.4M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
53.4M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
101k
                            float* JXL_RESTRICT coefficients) {
411
101k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
101k
  size_t afv_x = afv_kind & 1;
413
101k
  size_t afv_y = afv_kind / 2;
414
101k
  HWY_ALIGN float block[4 * 8] = {};
415
509k
  for (size_t iy = 0; iy < 4; iy++) {
416
2.03M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.62M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.62M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.62M
    }
420
407k
  }
421
  // AFV coefficients in (even, even) positions.
422
101k
  HWY_ALIGN float coeff[4 * 4];
423
101k
  AFVDCT4x4(block, coeff);
424
509k
  for (size_t iy = 0; iy < 4; iy++) {
425
2.03M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.62M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.62M
    }
428
407k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
101k
  ComputeScaledDCT<4, 4>()(
431
101k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
101k
              pixels_stride),
433
101k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
509k
  for (size_t iy = 0; iy < 4; iy++) {
436
3.66M
    for (size_t ix = 0; ix < 8; ix++) {
437
3.25M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
3.25M
    }
439
407k
  }
440
  // 4x8 DCT of the other half of the block.
441
101k
  ComputeScaledDCT<4, 8>()(
442
101k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
101k
      block, scratch_space);
444
509k
  for (size_t iy = 0; iy < 4; iy++) {
445
3.66M
    for (size_t ix = 0; ix < 8; ix++) {
446
3.25M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
3.25M
    }
448
407k
  }
449
101k
  float block00 = coefficients[0] * 0.25f;
450
101k
  float block01 = coefficients[1];
451
101k
  float block10 = coefficients[8];
452
101k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
101k
  coefficients[1] = (block00 - block01) * 0.5f;
454
101k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
101k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
58.9k
                            float* JXL_RESTRICT coefficients) {
411
58.9k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
58.9k
  size_t afv_x = afv_kind & 1;
413
58.9k
  size_t afv_y = afv_kind / 2;
414
58.9k
  HWY_ALIGN float block[4 * 8] = {};
415
294k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.17M
    for (size_t ix = 0; ix < 4; ix++) {
417
942k
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
942k
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
942k
    }
420
235k
  }
421
  // AFV coefficients in (even, even) positions.
422
58.9k
  HWY_ALIGN float coeff[4 * 4];
423
58.9k
  AFVDCT4x4(block, coeff);
424
294k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.17M
    for (size_t ix = 0; ix < 4; ix++) {
426
942k
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
942k
    }
428
235k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
58.9k
  ComputeScaledDCT<4, 4>()(
431
58.9k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
58.9k
              pixels_stride),
433
58.9k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
294k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.12M
    for (size_t ix = 0; ix < 8; ix++) {
437
1.88M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
1.88M
    }
439
235k
  }
440
  // 4x8 DCT of the other half of the block.
441
58.9k
  ComputeScaledDCT<4, 8>()(
442
58.9k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
58.9k
      block, scratch_space);
444
294k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.12M
    for (size_t ix = 0; ix < 8; ix++) {
446
1.88M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
1.88M
    }
448
235k
  }
449
58.9k
  float block00 = coefficients[0] * 0.25f;
450
58.9k
  float block01 = coefficients[1];
451
58.9k
  float block10 = coefficients[8];
452
58.9k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
58.9k
  coefficients[1] = (block00 - block01) * 0.5f;
454
58.9k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
58.9k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
72.1k
                            float* JXL_RESTRICT coefficients) {
411
72.1k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
72.1k
  size_t afv_x = afv_kind & 1;
413
72.1k
  size_t afv_y = afv_kind / 2;
414
72.1k
  HWY_ALIGN float block[4 * 8] = {};
415
360k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.44M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.15M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.15M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.15M
    }
420
288k
  }
421
  // AFV coefficients in (even, even) positions.
422
72.1k
  HWY_ALIGN float coeff[4 * 4];
423
72.1k
  AFVDCT4x4(block, coeff);
424
360k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.44M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.15M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.15M
    }
428
288k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
72.1k
  ComputeScaledDCT<4, 4>()(
431
72.1k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
72.1k
              pixels_stride),
433
72.1k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
360k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.59M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.30M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.30M
    }
439
288k
  }
440
  // 4x8 DCT of the other half of the block.
441
72.1k
  ComputeScaledDCT<4, 8>()(
442
72.1k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
72.1k
      block, scratch_space);
444
360k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.59M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.30M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.30M
    }
448
288k
  }
449
72.1k
  float block00 = coefficients[0] * 0.25f;
450
72.1k
  float block01 = coefficients[1];
451
72.1k
  float block10 = coefficients[8];
452
72.1k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
72.1k
  coefficients[1] = (block00 - block01) * 0.5f;
454
72.1k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
72.1k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
77.6k
                            float* JXL_RESTRICT coefficients) {
411
77.6k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
77.6k
  size_t afv_x = afv_kind & 1;
413
77.6k
  size_t afv_y = afv_kind / 2;
414
77.6k
  HWY_ALIGN float block[4 * 8] = {};
415
388k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.55M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.24M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.24M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.24M
    }
420
310k
  }
421
  // AFV coefficients in (even, even) positions.
422
77.6k
  HWY_ALIGN float coeff[4 * 4];
423
77.6k
  AFVDCT4x4(block, coeff);
424
388k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.55M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.24M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.24M
    }
428
310k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
77.6k
  ComputeScaledDCT<4, 4>()(
431
77.6k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
77.6k
              pixels_stride),
433
77.6k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
388k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.79M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.48M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.48M
    }
439
310k
  }
440
  // 4x8 DCT of the other half of the block.
441
77.6k
  ComputeScaledDCT<4, 8>()(
442
77.6k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
77.6k
      block, scratch_space);
444
388k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.79M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.48M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.48M
    }
448
310k
  }
449
77.6k
  float block00 = coefficients[0] * 0.25f;
450
77.6k
  float block01 = coefficients[1];
451
77.6k
  float block10 = coefficients[8];
452
77.6k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
77.6k
  coefficients[1] = (block00 - block01) * 0.5f;
454
77.6k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
77.6k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
101k
                            float* JXL_RESTRICT coefficients) {
411
101k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
101k
  size_t afv_x = afv_kind & 1;
413
101k
  size_t afv_y = afv_kind / 2;
414
101k
  HWY_ALIGN float block[4 * 8] = {};
415
509k
  for (size_t iy = 0; iy < 4; iy++) {
416
2.03M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.62M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.62M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.62M
    }
420
407k
  }
421
  // AFV coefficients in (even, even) positions.
422
101k
  HWY_ALIGN float coeff[4 * 4];
423
101k
  AFVDCT4x4(block, coeff);
424
509k
  for (size_t iy = 0; iy < 4; iy++) {
425
2.03M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.62M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.62M
    }
428
407k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
101k
  ComputeScaledDCT<4, 4>()(
431
101k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
101k
              pixels_stride),
433
101k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
509k
  for (size_t iy = 0; iy < 4; iy++) {
436
3.66M
    for (size_t ix = 0; ix < 8; ix++) {
437
3.25M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
3.25M
    }
439
407k
  }
440
  // 4x8 DCT of the other half of the block.
441
101k
  ComputeScaledDCT<4, 8>()(
442
101k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
101k
      block, scratch_space);
444
509k
  for (size_t iy = 0; iy < 4; iy++) {
445
3.66M
    for (size_t ix = 0; ix < 8; ix++) {
446
3.25M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
3.25M
    }
448
407k
  }
449
101k
  float block00 = coefficients[0] * 0.25f;
450
101k
  float block01 = coefficients[1];
451
101k
  float block10 = coefficients[8];
452
101k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
101k
  coefficients[1] = (block00 - block01) * 0.5f;
454
101k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
101k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
58.9k
                            float* JXL_RESTRICT coefficients) {
411
58.9k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
58.9k
  size_t afv_x = afv_kind & 1;
413
58.9k
  size_t afv_y = afv_kind / 2;
414
58.9k
  HWY_ALIGN float block[4 * 8] = {};
415
294k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.17M
    for (size_t ix = 0; ix < 4; ix++) {
417
942k
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
942k
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
942k
    }
420
235k
  }
421
  // AFV coefficients in (even, even) positions.
422
58.9k
  HWY_ALIGN float coeff[4 * 4];
423
58.9k
  AFVDCT4x4(block, coeff);
424
294k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.17M
    for (size_t ix = 0; ix < 4; ix++) {
426
942k
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
942k
    }
428
235k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
58.9k
  ComputeScaledDCT<4, 4>()(
431
58.9k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
58.9k
              pixels_stride),
433
58.9k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
294k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.12M
    for (size_t ix = 0; ix < 8; ix++) {
437
1.88M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
1.88M
    }
439
235k
  }
440
  // 4x8 DCT of the other half of the block.
441
58.9k
  ComputeScaledDCT<4, 8>()(
442
58.9k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
58.9k
      block, scratch_space);
444
294k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.12M
    for (size_t ix = 0; ix < 8; ix++) {
446
1.88M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
1.88M
    }
448
235k
  }
449
58.9k
  float block00 = coefficients[0] * 0.25f;
450
58.9k
  float block01 = coefficients[1];
451
58.9k
  float block10 = coefficients[8];
452
58.9k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
58.9k
  coefficients[1] = (block00 - block01) * 0.5f;
454
58.9k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
58.9k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
72.1k
                            float* JXL_RESTRICT coefficients) {
411
72.1k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
72.1k
  size_t afv_x = afv_kind & 1;
413
72.1k
  size_t afv_y = afv_kind / 2;
414
72.1k
  HWY_ALIGN float block[4 * 8] = {};
415
360k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.44M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.15M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.15M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.15M
    }
420
288k
  }
421
  // AFV coefficients in (even, even) positions.
422
72.1k
  HWY_ALIGN float coeff[4 * 4];
423
72.1k
  AFVDCT4x4(block, coeff);
424
360k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.44M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.15M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.15M
    }
428
288k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
72.1k
  ComputeScaledDCT<4, 4>()(
431
72.1k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
72.1k
              pixels_stride),
433
72.1k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
360k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.59M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.30M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.30M
    }
439
288k
  }
440
  // 4x8 DCT of the other half of the block.
441
72.1k
  ComputeScaledDCT<4, 8>()(
442
72.1k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
72.1k
      block, scratch_space);
444
360k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.59M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.30M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.30M
    }
448
288k
  }
449
72.1k
  float block00 = coefficients[0] * 0.25f;
450
72.1k
  float block01 = coefficients[1];
451
72.1k
  float block10 = coefficients[8];
452
72.1k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
72.1k
  coefficients[1] = (block00 - block01) * 0.5f;
454
72.1k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
72.1k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
77.6k
                            float* JXL_RESTRICT coefficients) {
411
77.6k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
77.6k
  size_t afv_x = afv_kind & 1;
413
77.6k
  size_t afv_y = afv_kind / 2;
414
77.6k
  HWY_ALIGN float block[4 * 8] = {};
415
388k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.55M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.24M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.24M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.24M
    }
420
310k
  }
421
  // AFV coefficients in (even, even) positions.
422
77.6k
  HWY_ALIGN float coeff[4 * 4];
423
77.6k
  AFVDCT4x4(block, coeff);
424
388k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.55M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.24M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.24M
    }
428
310k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
77.6k
  ComputeScaledDCT<4, 4>()(
431
77.6k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
77.6k
              pixels_stride),
433
77.6k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
388k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.79M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.48M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.48M
    }
439
310k
  }
440
  // 4x8 DCT of the other half of the block.
441
77.6k
  ComputeScaledDCT<4, 8>()(
442
77.6k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
77.6k
      block, scratch_space);
444
388k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.79M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.48M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.48M
    }
448
310k
  }
449
77.6k
  float block00 = coefficients[0] * 0.25f;
450
77.6k
  float block01 = coefficients[1];
451
77.6k
  float block10 = coefficients[8];
452
77.6k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
77.6k
  coefficients[1] = (block00 - block01) * 0.5f;
454
77.6k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
77.6k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
13.2M
                            float* JXL_RESTRICT coefficients) {
411
13.2M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
13.2M
  size_t afv_x = afv_kind & 1;
413
13.2M
  size_t afv_y = afv_kind / 2;
414
13.2M
  HWY_ALIGN float block[4 * 8] = {};
415
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
416
264M
    for (size_t ix = 0; ix < 4; ix++) {
417
211M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
211M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
211M
    }
420
52.8M
  }
421
  // AFV coefficients in (even, even) positions.
422
13.2M
  HWY_ALIGN float coeff[4 * 4];
423
13.2M
  AFVDCT4x4(block, coeff);
424
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
425
264M
    for (size_t ix = 0; ix < 4; ix++) {
426
211M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
211M
    }
428
52.8M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
13.2M
  ComputeScaledDCT<4, 4>()(
431
13.2M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
13.2M
              pixels_stride),
433
13.2M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
436
475M
    for (size_t ix = 0; ix < 8; ix++) {
437
422M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
422M
    }
439
52.8M
  }
440
  // 4x8 DCT of the other half of the block.
441
13.2M
  ComputeScaledDCT<4, 8>()(
442
13.2M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
13.2M
      block, scratch_space);
444
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
445
475M
    for (size_t ix = 0; ix < 8; ix++) {
446
422M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
422M
    }
448
52.8M
  }
449
13.2M
  float block00 = coefficients[0] * 0.25f;
450
13.2M
  float block01 = coefficients[1];
451
13.2M
  float block10 = coefficients[8];
452
13.2M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
13.2M
  coefficients[1] = (block00 - block01) * 0.5f;
454
13.2M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
13.2M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
13.2M
                            float* JXL_RESTRICT coefficients) {
411
13.2M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
13.2M
  size_t afv_x = afv_kind & 1;
413
13.2M
  size_t afv_y = afv_kind / 2;
414
13.2M
  HWY_ALIGN float block[4 * 8] = {};
415
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
416
264M
    for (size_t ix = 0; ix < 4; ix++) {
417
211M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
211M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
211M
    }
420
52.8M
  }
421
  // AFV coefficients in (even, even) positions.
422
13.2M
  HWY_ALIGN float coeff[4 * 4];
423
13.2M
  AFVDCT4x4(block, coeff);
424
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
425
264M
    for (size_t ix = 0; ix < 4; ix++) {
426
211M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
211M
    }
428
52.8M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
13.2M
  ComputeScaledDCT<4, 4>()(
431
13.2M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
13.2M
              pixels_stride),
433
13.2M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
436
475M
    for (size_t ix = 0; ix < 8; ix++) {
437
422M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
422M
    }
439
52.8M
  }
440
  // 4x8 DCT of the other half of the block.
441
13.2M
  ComputeScaledDCT<4, 8>()(
442
13.2M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
13.2M
      block, scratch_space);
444
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
445
475M
    for (size_t ix = 0; ix < 8; ix++) {
446
422M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
422M
    }
448
52.8M
  }
449
13.2M
  float block00 = coefficients[0] * 0.25f;
450
13.2M
  float block01 = coefficients[1];
451
13.2M
  float block10 = coefficients[8];
452
13.2M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
13.2M
  coefficients[1] = (block00 - block01) * 0.5f;
454
13.2M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
13.2M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
13.2M
                            float* JXL_RESTRICT coefficients) {
411
13.2M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
13.2M
  size_t afv_x = afv_kind & 1;
413
13.2M
  size_t afv_y = afv_kind / 2;
414
13.2M
  HWY_ALIGN float block[4 * 8] = {};
415
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
416
264M
    for (size_t ix = 0; ix < 4; ix++) {
417
211M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
211M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
211M
    }
420
52.8M
  }
421
  // AFV coefficients in (even, even) positions.
422
13.2M
  HWY_ALIGN float coeff[4 * 4];
423
13.2M
  AFVDCT4x4(block, coeff);
424
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
425
264M
    for (size_t ix = 0; ix < 4; ix++) {
426
211M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
211M
    }
428
52.8M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
13.2M
  ComputeScaledDCT<4, 4>()(
431
13.2M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
13.2M
              pixels_stride),
433
13.2M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
436
475M
    for (size_t ix = 0; ix < 8; ix++) {
437
422M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
422M
    }
439
52.8M
  }
440
  // 4x8 DCT of the other half of the block.
441
13.2M
  ComputeScaledDCT<4, 8>()(
442
13.2M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
13.2M
      block, scratch_space);
444
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
445
475M
    for (size_t ix = 0; ix < 8; ix++) {
446
422M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
422M
    }
448
52.8M
  }
449
13.2M
  float block00 = coefficients[0] * 0.25f;
450
13.2M
  float block01 = coefficients[1];
451
13.2M
  float block10 = coefficients[8];
452
13.2M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
13.2M
  coefficients[1] = (block00 - block01) * 0.5f;
454
13.2M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
13.2M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
13.2M
                            float* JXL_RESTRICT coefficients) {
411
13.2M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
13.2M
  size_t afv_x = afv_kind & 1;
413
13.2M
  size_t afv_y = afv_kind / 2;
414
13.2M
  HWY_ALIGN float block[4 * 8] = {};
415
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
416
264M
    for (size_t ix = 0; ix < 4; ix++) {
417
211M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
211M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
211M
    }
420
52.8M
  }
421
  // AFV coefficients in (even, even) positions.
422
13.2M
  HWY_ALIGN float coeff[4 * 4];
423
13.2M
  AFVDCT4x4(block, coeff);
424
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
425
264M
    for (size_t ix = 0; ix < 4; ix++) {
426
211M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
211M
    }
428
52.8M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
13.2M
  ComputeScaledDCT<4, 4>()(
431
13.2M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
13.2M
              pixels_stride),
433
13.2M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
436
475M
    for (size_t ix = 0; ix < 8; ix++) {
437
422M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
422M
    }
439
52.8M
  }
440
  // 4x8 DCT of the other half of the block.
441
13.2M
  ComputeScaledDCT<4, 8>()(
442
13.2M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
13.2M
      block, scratch_space);
444
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
445
475M
    for (size_t ix = 0; ix < 8; ix++) {
446
422M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
422M
    }
448
52.8M
  }
449
13.2M
  float block00 = coefficients[0] * 0.25f;
450
13.2M
  float block01 = coefficients[1];
451
13.2M
  float block10 = coefficients[8];
452
13.2M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
13.2M
  coefficients[1] = (block00 - block01) * 0.5f;
454
13.2M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
13.2M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
456
457
HWY_MAYBE_UNUSED void TransformFromPixels(const AcStrategyType strategy,
458
                                          const float* JXL_RESTRICT pixels,
459
                                          size_t pixels_stride,
460
                                          float* JXL_RESTRICT coefficients,
461
189M
                                          float* JXL_RESTRICT scratch_space) {
462
189M
  using Type = AcStrategyType;
463
189M
  switch (strategy) {
464
14.6M
    case Type::IDENTITY: {
465
43.9M
      for (size_t y = 0; y < 2; y++) {
466
87.8M
        for (size_t x = 0; x < 2; x++) {
467
58.5M
          float block_dc = 0;
468
292M
          for (size_t iy = 0; iy < 4; iy++) {
469
1.17G
            for (size_t ix = 0; ix < 4; ix++) {
470
936M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
936M
            }
472
234M
          }
473
58.5M
          block_dc *= 1.0f / 16;
474
292M
          for (size_t iy = 0; iy < 4; iy++) {
475
1.17G
            for (size_t ix = 0; ix < 4; ix++) {
476
936M
              if (ix == 1 && iy == 1) continue;
477
878M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
878M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
878M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
878M
            }
481
234M
          }
482
58.5M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
58.5M
          coefficients[y * 8 + x] = block_dc;
484
58.5M
        }
485
29.2M
      }
486
14.6M
      float block00 = coefficients[0];
487
14.6M
      float block01 = coefficients[1];
488
14.6M
      float block10 = coefficients[8];
489
14.6M
      float block11 = coefficients[9];
490
14.6M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
14.6M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
14.6M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
14.6M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
14.6M
      break;
495
0
    }
496
13.5M
    case Type::DCT8X4: {
497
40.5M
      for (size_t x = 0; x < 2; x++) {
498
27.0M
        HWY_ALIGN float block[4 * 8];
499
27.0M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
27.0M
                                 scratch_space);
501
135M
        for (size_t iy = 0; iy < 4; iy++) {
502
972M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
864M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
864M
          }
506
108M
        }
507
27.0M
      }
508
13.5M
      float block0 = coefficients[0];
509
13.5M
      float block1 = coefficients[8];
510
13.5M
      coefficients[0] = (block0 + block1) * 0.5f;
511
13.5M
      coefficients[8] = (block0 - block1) * 0.5f;
512
13.5M
      break;
513
0
    }
514
13.3M
    case Type::DCT4X8: {
515
40.0M
      for (size_t y = 0; y < 2; y++) {
516
26.6M
        HWY_ALIGN float block[4 * 8];
517
26.6M
        ComputeScaledDCT<4, 8>()(
518
26.6M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
26.6M
            scratch_space);
520
133M
        for (size_t iy = 0; iy < 4; iy++) {
521
961M
          for (size_t ix = 0; ix < 8; ix++) {
522
854M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
854M
          }
524
106M
        }
525
26.6M
      }
526
13.3M
      float block0 = coefficients[0];
527
13.3M
      float block1 = coefficients[8];
528
13.3M
      coefficients[0] = (block0 + block1) * 0.5f;
529
13.3M
      coefficients[8] = (block0 - block1) * 0.5f;
530
13.3M
      break;
531
0
    }
532
13.2M
    case Type::DCT4X4: {
533
39.6M
      for (size_t y = 0; y < 2; y++) {
534
79.2M
        for (size_t x = 0; x < 2; x++) {
535
52.8M
          HWY_ALIGN float block[4 * 4];
536
52.8M
          ComputeScaledDCT<4, 4>()(
537
52.8M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
52.8M
              block, scratch_space);
539
264M
          for (size_t iy = 0; iy < 4; iy++) {
540
1.05G
            for (size_t ix = 0; ix < 4; ix++) {
541
845M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
845M
            }
543
211M
          }
544
52.8M
        }
545
26.4M
      }
546
13.2M
      float block00 = coefficients[0];
547
13.2M
      float block01 = coefficients[1];
548
13.2M
      float block10 = coefficients[8];
549
13.2M
      float block11 = coefficients[9];
550
13.2M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
13.2M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
13.2M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
13.2M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
13.2M
      break;
555
0
    }
556
17.3M
    case Type::DCT2X2: {
557
17.3M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
17.3M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
17.3M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
17.3M
      break;
561
0
    }
562
5.62M
    case Type::DCT16X16: {
563
5.62M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
5.62M
                                 scratch_space);
565
5.62M
      break;
566
0
    }
567
10.8M
    case Type::DCT16X8: {
568
10.8M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
10.8M
                                scratch_space);
570
10.8M
      break;
571
0
    }
572
10.9M
    case Type::DCT8X16: {
573
10.9M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
10.9M
                                scratch_space);
575
10.9M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
2.19M
    case Type::DCT32X16: {
588
2.19M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
2.19M
                                 scratch_space);
590
2.19M
      break;
591
0
    }
592
2.17M
    case Type::DCT16X32: {
593
2.17M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
2.17M
                                 scratch_space);
595
2.17M
      break;
596
0
    }
597
1.25M
    case Type::DCT32X32: {
598
1.25M
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
1.25M
                                 scratch_space);
600
1.25M
      break;
601
0
    }
602
29.5M
    case Type::DCT: {
603
29.5M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
29.5M
                               scratch_space);
605
29.5M
      break;
606
0
    }
607
13.4M
    case Type::AFV0: {
608
13.4M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
13.4M
      break;
610
0
    }
611
13.3M
    case Type::AFV1: {
612
13.3M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
13.3M
      break;
614
0
    }
615
13.3M
    case Type::AFV2: {
616
13.3M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
13.3M
      break;
618
0
    }
619
13.3M
    case Type::AFV3: {
620
13.3M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
13.3M
      break;
622
0
    }
623
293k
    case Type::DCT64X64: {
624
293k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
293k
                                 scratch_space);
626
293k
      break;
627
0
    }
628
657k
    case Type::DCT64X32: {
629
657k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
657k
                                 scratch_space);
631
657k
      break;
632
0
    }
633
398k
    case Type::DCT32X64: {
634
398k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
398k
                                 scratch_space);
636
398k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
189M
  }
669
189M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
5.86M
                                          float* JXL_RESTRICT scratch_space) {
462
5.86M
  using Type = AcStrategyType;
463
5.86M
  switch (strategy) {
464
713k
    case Type::IDENTITY: {
465
2.14M
      for (size_t y = 0; y < 2; y++) {
466
4.28M
        for (size_t x = 0; x < 2; x++) {
467
2.85M
          float block_dc = 0;
468
14.2M
          for (size_t iy = 0; iy < 4; iy++) {
469
57.1M
            for (size_t ix = 0; ix < 4; ix++) {
470
45.6M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
45.6M
            }
472
11.4M
          }
473
2.85M
          block_dc *= 1.0f / 16;
474
14.2M
          for (size_t iy = 0; iy < 4; iy++) {
475
57.1M
            for (size_t ix = 0; ix < 4; ix++) {
476
45.6M
              if (ix == 1 && iy == 1) continue;
477
42.8M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
42.8M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
42.8M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
42.8M
            }
481
11.4M
          }
482
2.85M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
2.85M
          coefficients[y * 8 + x] = block_dc;
484
2.85M
        }
485
1.42M
      }
486
713k
      float block00 = coefficients[0];
487
713k
      float block01 = coefficients[1];
488
713k
      float block10 = coefficients[8];
489
713k
      float block11 = coefficients[9];
490
713k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
713k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
713k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
713k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
713k
      break;
495
0
    }
496
150k
    case Type::DCT8X4: {
497
451k
      for (size_t x = 0; x < 2; x++) {
498
300k
        HWY_ALIGN float block[4 * 8];
499
300k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
300k
                                 scratch_space);
501
1.50M
        for (size_t iy = 0; iy < 4; iy++) {
502
10.8M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
9.63M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
9.63M
          }
506
1.20M
        }
507
300k
      }
508
150k
      float block0 = coefficients[0];
509
150k
      float block1 = coefficients[8];
510
150k
      coefficients[0] = (block0 + block1) * 0.5f;
511
150k
      coefficients[8] = (block0 - block1) * 0.5f;
512
150k
      break;
513
0
    }
514
70.4k
    case Type::DCT4X8: {
515
211k
      for (size_t y = 0; y < 2; y++) {
516
140k
        HWY_ALIGN float block[4 * 8];
517
140k
        ComputeScaledDCT<4, 8>()(
518
140k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
140k
            scratch_space);
520
704k
        for (size_t iy = 0; iy < 4; iy++) {
521
5.06M
          for (size_t ix = 0; ix < 8; ix++) {
522
4.50M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
4.50M
          }
524
563k
        }
525
140k
      }
526
70.4k
      float block0 = coefficients[0];
527
70.4k
      float block1 = coefficients[8];
528
70.4k
      coefficients[0] = (block0 + block1) * 0.5f;
529
70.4k
      coefficients[8] = (block0 - block1) * 0.5f;
530
70.4k
      break;
531
0
    }
532
315
    case Type::DCT4X4: {
533
945
      for (size_t y = 0; y < 2; y++) {
534
1.89k
        for (size_t x = 0; x < 2; x++) {
535
1.26k
          HWY_ALIGN float block[4 * 4];
536
1.26k
          ComputeScaledDCT<4, 4>()(
537
1.26k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
1.26k
              block, scratch_space);
539
6.30k
          for (size_t iy = 0; iy < 4; iy++) {
540
25.2k
            for (size_t ix = 0; ix < 4; ix++) {
541
20.1k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
20.1k
            }
543
5.04k
          }
544
1.26k
        }
545
630
      }
546
315
      float block00 = coefficients[0];
547
315
      float block01 = coefficients[1];
548
315
      float block10 = coefficients[8];
549
315
      float block11 = coefficients[9];
550
315
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
315
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
315
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
315
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
315
      break;
555
0
    }
556
2.09M
    case Type::DCT2X2: {
557
2.09M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
2.09M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
2.09M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
2.09M
      break;
561
0
    }
562
172k
    case Type::DCT16X16: {
563
172k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
172k
                                 scratch_space);
565
172k
      break;
566
0
    }
567
234k
    case Type::DCT16X8: {
568
234k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
234k
                                scratch_space);
570
234k
      break;
571
0
    }
572
251k
    case Type::DCT8X16: {
573
251k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
251k
                                scratch_space);
575
251k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
55.6k
    case Type::DCT32X16: {
588
55.6k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
55.6k
                                 scratch_space);
590
55.6k
      break;
591
0
    }
592
57.7k
    case Type::DCT16X32: {
593
57.7k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
57.7k
                                 scratch_space);
595
57.7k
      break;
596
0
    }
597
98.4k
    case Type::DCT32X32: {
598
98.4k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
98.4k
                                 scratch_space);
600
98.4k
      break;
601
0
    }
602
1.58M
    case Type::DCT: {
603
1.58M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
1.58M
                               scratch_space);
605
1.58M
      break;
606
0
    }
607
101k
    case Type::AFV0: {
608
101k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
101k
      break;
610
0
    }
611
58.9k
    case Type::AFV1: {
612
58.9k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
58.9k
      break;
614
0
    }
615
72.1k
    case Type::AFV2: {
616
72.1k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
72.1k
      break;
618
0
    }
619
77.6k
    case Type::AFV3: {
620
77.6k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
77.6k
      break;
622
0
    }
623
55.4k
    case Type::DCT64X64: {
624
55.4k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
55.4k
                                 scratch_space);
626
55.4k
      break;
627
0
    }
628
12.0k
    case Type::DCT64X32: {
629
12.0k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
12.0k
                                 scratch_space);
631
12.0k
      break;
632
0
    }
633
6.50k
    case Type::DCT32X64: {
634
6.50k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
6.50k
                                 scratch_space);
636
6.50k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
5.86M
  }
669
5.86M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
19.0M
                                          float* JXL_RESTRICT scratch_space) {
462
19.0M
  using Type = AcStrategyType;
463
19.0M
  switch (strategy) {
464
713k
    case Type::IDENTITY: {
465
2.14M
      for (size_t y = 0; y < 2; y++) {
466
4.28M
        for (size_t x = 0; x < 2; x++) {
467
2.85M
          float block_dc = 0;
468
14.2M
          for (size_t iy = 0; iy < 4; iy++) {
469
57.1M
            for (size_t ix = 0; ix < 4; ix++) {
470
45.6M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
45.6M
            }
472
11.4M
          }
473
2.85M
          block_dc *= 1.0f / 16;
474
14.2M
          for (size_t iy = 0; iy < 4; iy++) {
475
57.1M
            for (size_t ix = 0; ix < 4; ix++) {
476
45.6M
              if (ix == 1 && iy == 1) continue;
477
42.8M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
42.8M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
42.8M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
42.8M
            }
481
11.4M
          }
482
2.85M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
2.85M
          coefficients[y * 8 + x] = block_dc;
484
2.85M
        }
485
1.42M
      }
486
713k
      float block00 = coefficients[0];
487
713k
      float block01 = coefficients[1];
488
713k
      float block10 = coefficients[8];
489
713k
      float block11 = coefficients[9];
490
713k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
713k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
713k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
713k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
713k
      break;
495
0
    }
496
150k
    case Type::DCT8X4: {
497
451k
      for (size_t x = 0; x < 2; x++) {
498
300k
        HWY_ALIGN float block[4 * 8];
499
300k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
300k
                                 scratch_space);
501
1.50M
        for (size_t iy = 0; iy < 4; iy++) {
502
10.8M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
9.63M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
9.63M
          }
506
1.20M
        }
507
300k
      }
508
150k
      float block0 = coefficients[0];
509
150k
      float block1 = coefficients[8];
510
150k
      coefficients[0] = (block0 + block1) * 0.5f;
511
150k
      coefficients[8] = (block0 - block1) * 0.5f;
512
150k
      break;
513
0
    }
514
70.4k
    case Type::DCT4X8: {
515
211k
      for (size_t y = 0; y < 2; y++) {
516
140k
        HWY_ALIGN float block[4 * 8];
517
140k
        ComputeScaledDCT<4, 8>()(
518
140k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
140k
            scratch_space);
520
704k
        for (size_t iy = 0; iy < 4; iy++) {
521
5.06M
          for (size_t ix = 0; ix < 8; ix++) {
522
4.50M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
4.50M
          }
524
563k
        }
525
140k
      }
526
70.4k
      float block0 = coefficients[0];
527
70.4k
      float block1 = coefficients[8];
528
70.4k
      coefficients[0] = (block0 + block1) * 0.5f;
529
70.4k
      coefficients[8] = (block0 - block1) * 0.5f;
530
70.4k
      break;
531
0
    }
532
315
    case Type::DCT4X4: {
533
945
      for (size_t y = 0; y < 2; y++) {
534
1.89k
        for (size_t x = 0; x < 2; x++) {
535
1.26k
          HWY_ALIGN float block[4 * 4];
536
1.26k
          ComputeScaledDCT<4, 4>()(
537
1.26k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
1.26k
              block, scratch_space);
539
6.30k
          for (size_t iy = 0; iy < 4; iy++) {
540
25.2k
            for (size_t ix = 0; ix < 4; ix++) {
541
20.1k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
20.1k
            }
543
5.04k
          }
544
1.26k
        }
545
630
      }
546
315
      float block00 = coefficients[0];
547
315
      float block01 = coefficients[1];
548
315
      float block10 = coefficients[8];
549
315
      float block11 = coefficients[9];
550
315
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
315
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
315
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
315
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
315
      break;
555
0
    }
556
2.09M
    case Type::DCT2X2: {
557
2.09M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
2.09M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
2.09M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
2.09M
      break;
561
0
    }
562
172k
    case Type::DCT16X16: {
563
172k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
172k
                                 scratch_space);
565
172k
      break;
566
0
    }
567
234k
    case Type::DCT16X8: {
568
234k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
234k
                                scratch_space);
570
234k
      break;
571
0
    }
572
251k
    case Type::DCT8X16: {
573
251k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
251k
                                scratch_space);
575
251k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
55.6k
    case Type::DCT32X16: {
588
55.6k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
55.6k
                                 scratch_space);
590
55.6k
      break;
591
0
    }
592
57.7k
    case Type::DCT16X32: {
593
57.7k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
57.7k
                                 scratch_space);
595
57.7k
      break;
596
0
    }
597
98.4k
    case Type::DCT32X32: {
598
98.4k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
98.4k
                                 scratch_space);
600
98.4k
      break;
601
0
    }
602
14.7M
    case Type::DCT: {
603
14.7M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
14.7M
                               scratch_space);
605
14.7M
      break;
606
0
    }
607
101k
    case Type::AFV0: {
608
101k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
101k
      break;
610
0
    }
611
58.9k
    case Type::AFV1: {
612
58.9k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
58.9k
      break;
614
0
    }
615
72.1k
    case Type::AFV2: {
616
72.1k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
72.1k
      break;
618
0
    }
619
77.6k
    case Type::AFV3: {
620
77.6k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
77.6k
      break;
622
0
    }
623
55.4k
    case Type::DCT64X64: {
624
55.4k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
55.4k
                                 scratch_space);
626
55.4k
      break;
627
0
    }
628
12.0k
    case Type::DCT64X32: {
629
12.0k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
12.0k
                                 scratch_space);
631
12.0k
      break;
632
0
    }
633
6.50k
    case Type::DCT32X64: {
634
6.50k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
6.50k
                                 scratch_space);
636
6.50k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
19.0M
  }
669
19.0M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
164M
                                          float* JXL_RESTRICT scratch_space) {
462
164M
  using Type = AcStrategyType;
463
164M
  switch (strategy) {
464
13.2M
    case Type::IDENTITY: {
465
39.6M
      for (size_t y = 0; y < 2; y++) {
466
79.2M
        for (size_t x = 0; x < 2; x++) {
467
52.8M
          float block_dc = 0;
468
264M
          for (size_t iy = 0; iy < 4; iy++) {
469
1.05G
            for (size_t ix = 0; ix < 4; ix++) {
470
845M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
845M
            }
472
211M
          }
473
52.8M
          block_dc *= 1.0f / 16;
474
264M
          for (size_t iy = 0; iy < 4; iy++) {
475
1.05G
            for (size_t ix = 0; ix < 4; ix++) {
476
845M
              if (ix == 1 && iy == 1) continue;
477
792M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
792M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
792M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
792M
            }
481
211M
          }
482
52.8M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
52.8M
          coefficients[y * 8 + x] = block_dc;
484
52.8M
        }
485
26.4M
      }
486
13.2M
      float block00 = coefficients[0];
487
13.2M
      float block01 = coefficients[1];
488
13.2M
      float block10 = coefficients[8];
489
13.2M
      float block11 = coefficients[9];
490
13.2M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
13.2M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
13.2M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
13.2M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
13.2M
      break;
495
0
    }
496
13.2M
    case Type::DCT8X4: {
497
39.6M
      for (size_t x = 0; x < 2; x++) {
498
26.4M
        HWY_ALIGN float block[4 * 8];
499
26.4M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
26.4M
                                 scratch_space);
501
132M
        for (size_t iy = 0; iy < 4; iy++) {
502
950M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
845M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
845M
          }
506
105M
        }
507
26.4M
      }
508
13.2M
      float block0 = coefficients[0];
509
13.2M
      float block1 = coefficients[8];
510
13.2M
      coefficients[0] = (block0 + block1) * 0.5f;
511
13.2M
      coefficients[8] = (block0 - block1) * 0.5f;
512
13.2M
      break;
513
0
    }
514
13.2M
    case Type::DCT4X8: {
515
39.6M
      for (size_t y = 0; y < 2; y++) {
516
26.4M
        HWY_ALIGN float block[4 * 8];
517
26.4M
        ComputeScaledDCT<4, 8>()(
518
26.4M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
26.4M
            scratch_space);
520
132M
        for (size_t iy = 0; iy < 4; iy++) {
521
950M
          for (size_t ix = 0; ix < 8; ix++) {
522
845M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
845M
          }
524
105M
        }
525
26.4M
      }
526
13.2M
      float block0 = coefficients[0];
527
13.2M
      float block1 = coefficients[8];
528
13.2M
      coefficients[0] = (block0 + block1) * 0.5f;
529
13.2M
      coefficients[8] = (block0 - block1) * 0.5f;
530
13.2M
      break;
531
0
    }
532
13.2M
    case Type::DCT4X4: {
533
39.6M
      for (size_t y = 0; y < 2; y++) {
534
79.2M
        for (size_t x = 0; x < 2; x++) {
535
52.8M
          HWY_ALIGN float block[4 * 4];
536
52.8M
          ComputeScaledDCT<4, 4>()(
537
52.8M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
52.8M
              block, scratch_space);
539
264M
          for (size_t iy = 0; iy < 4; iy++) {
540
1.05G
            for (size_t ix = 0; ix < 4; ix++) {
541
845M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
845M
            }
543
211M
          }
544
52.8M
        }
545
26.4M
      }
546
13.2M
      float block00 = coefficients[0];
547
13.2M
      float block01 = coefficients[1];
548
13.2M
      float block10 = coefficients[8];
549
13.2M
      float block11 = coefficients[9];
550
13.2M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
13.2M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
13.2M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
13.2M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
13.2M
      break;
555
0
    }
556
13.2M
    case Type::DCT2X2: {
557
13.2M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
13.2M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
13.2M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
13.2M
      break;
561
0
    }
562
5.27M
    case Type::DCT16X16: {
563
5.27M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
5.27M
                                 scratch_space);
565
5.27M
      break;
566
0
    }
567
10.4M
    case Type::DCT16X8: {
568
10.4M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
10.4M
                                scratch_space);
570
10.4M
      break;
571
0
    }
572
10.4M
    case Type::DCT8X16: {
573
10.4M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
10.4M
                                scratch_space);
575
10.4M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
2.08M
    case Type::DCT32X16: {
588
2.08M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
2.08M
                                 scratch_space);
590
2.08M
      break;
591
0
    }
592
2.06M
    case Type::DCT16X32: {
593
2.06M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
2.06M
                                 scratch_space);
595
2.06M
      break;
596
0
    }
597
1.05M
    case Type::DCT32X32: {
598
1.05M
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
1.05M
                                 scratch_space);
600
1.05M
      break;
601
0
    }
602
13.2M
    case Type::DCT: {
603
13.2M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
13.2M
                               scratch_space);
605
13.2M
      break;
606
0
    }
607
13.2M
    case Type::AFV0: {
608
13.2M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
13.2M
      break;
610
0
    }
611
13.2M
    case Type::AFV1: {
612
13.2M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
13.2M
      break;
614
0
    }
615
13.2M
    case Type::AFV2: {
616
13.2M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
13.2M
      break;
618
0
    }
619
13.2M
    case Type::AFV3: {
620
13.2M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
13.2M
      break;
622
0
    }
623
182k
    case Type::DCT64X64: {
624
182k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
182k
                                 scratch_space);
626
182k
      break;
627
0
    }
628
633k
    case Type::DCT64X32: {
629
633k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
633k
                                 scratch_space);
631
633k
      break;
632
0
    }
633
384k
    case Type::DCT32X64: {
634
384k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
384k
                                 scratch_space);
636
384k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
164M
  }
669
164M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
670
671
// `scratch_space` should be at least 4 * kMaxBlocks * kMaxBlocks elements.
672
HWY_MAYBE_UNUSED void DCFromLowestFrequencies(const AcStrategyType strategy,
673
                                              const float* block, float* dc,
674
                                              size_t dc_stride,
675
24.9M
                                              float* scratch_space) {
676
24.9M
  using Type = AcStrategyType;
677
24.9M
  switch (strategy) {
678
469k
    case Type::DCT16X8: {
679
469k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
469k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
469k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
469k
      break;
683
0
    }
684
503k
    case Type::DCT8X16: {
685
503k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
503k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
503k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
503k
      break;
689
0
    }
690
345k
    case Type::DCT16X16: {
691
345k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
345k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
345k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
345k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
111k
    case Type::DCT32X16: {
709
111k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
111k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
111k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
111k
      break;
713
0
    }
714
115k
    case Type::DCT16X32: {
715
115k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
115k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
115k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
115k
      break;
719
0
    }
720
196k
    case Type::DCT32X32: {
721
196k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
196k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
196k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
196k
      break;
725
0
    }
726
24.1k
    case Type::DCT64X32: {
727
24.1k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
24.1k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
24.1k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
24.1k
      break;
731
0
    }
732
13.0k
    case Type::DCT32X64: {
733
13.0k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
13.0k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
13.0k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
13.0k
      break;
737
0
    }
738
110k
    case Type::DCT64X64: {
739
110k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
110k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
110k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
110k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
16.3M
    case Type::DCT:
787
20.5M
    case Type::DCT2X2:
788
20.5M
    case Type::DCT4X4:
789
20.6M
    case Type::DCT4X8:
790
21.0M
    case Type::DCT8X4:
791
21.2M
    case Type::AFV0:
792
21.3M
    case Type::AFV1:
793
21.4M
    case Type::AFV2:
794
21.6M
    case Type::AFV3:
795
23.0M
    case Type::IDENTITY:
796
23.0M
      dc[0] = block[0];
797
23.0M
      break;
798
24.9M
  }
799
24.9M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
5.86M
                                              float* scratch_space) {
676
5.86M
  using Type = AcStrategyType;
677
5.86M
  switch (strategy) {
678
234k
    case Type::DCT16X8: {
679
234k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
234k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
234k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
234k
      break;
683
0
    }
684
251k
    case Type::DCT8X16: {
685
251k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
251k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
251k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
251k
      break;
689
0
    }
690
172k
    case Type::DCT16X16: {
691
172k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
172k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
172k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
172k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
55.6k
    case Type::DCT32X16: {
709
55.6k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
55.6k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
55.6k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
55.6k
      break;
713
0
    }
714
57.7k
    case Type::DCT16X32: {
715
57.7k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
57.7k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
57.7k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
57.7k
      break;
719
0
    }
720
98.4k
    case Type::DCT32X32: {
721
98.4k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
98.4k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
98.4k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
98.4k
      break;
725
0
    }
726
12.0k
    case Type::DCT64X32: {
727
12.0k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
12.0k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
12.0k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
12.0k
      break;
731
0
    }
732
6.50k
    case Type::DCT32X64: {
733
6.50k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
6.50k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
6.50k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
6.50k
      break;
737
0
    }
738
55.4k
    case Type::DCT64X64: {
739
55.4k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
55.4k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
55.4k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
55.4k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
1.58M
    case Type::DCT:
787
3.67M
    case Type::DCT2X2:
788
3.67M
    case Type::DCT4X4:
789
3.74M
    case Type::DCT4X8:
790
3.89M
    case Type::DCT8X4:
791
3.99M
    case Type::AFV0:
792
4.05M
    case Type::AFV1:
793
4.12M
    case Type::AFV2:
794
4.20M
    case Type::AFV3:
795
4.92M
    case Type::IDENTITY:
796
4.92M
      dc[0] = block[0];
797
4.92M
      break;
798
5.86M
  }
799
5.86M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
19.0M
                                              float* scratch_space) {
676
19.0M
  using Type = AcStrategyType;
677
19.0M
  switch (strategy) {
678
234k
    case Type::DCT16X8: {
679
234k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
234k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
234k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
234k
      break;
683
0
    }
684
251k
    case Type::DCT8X16: {
685
251k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
251k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
251k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
251k
      break;
689
0
    }
690
172k
    case Type::DCT16X16: {
691
172k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
172k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
172k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
172k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
55.6k
    case Type::DCT32X16: {
709
55.6k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
55.6k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
55.6k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
55.6k
      break;
713
0
    }
714
57.7k
    case Type::DCT16X32: {
715
57.7k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
57.7k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
57.7k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
57.7k
      break;
719
0
    }
720
98.4k
    case Type::DCT32X32: {
721
98.4k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
98.4k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
98.4k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
98.4k
      break;
725
0
    }
726
12.0k
    case Type::DCT64X32: {
727
12.0k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
12.0k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
12.0k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
12.0k
      break;
731
0
    }
732
6.50k
    case Type::DCT32X64: {
733
6.50k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
6.50k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
6.50k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
6.50k
      break;
737
0
    }
738
55.4k
    case Type::DCT64X64: {
739
55.4k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
55.4k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
55.4k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
55.4k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
14.7M
    case Type::DCT:
787
16.8M
    case Type::DCT2X2:
788
16.8M
    case Type::DCT4X4:
789
16.9M
    case Type::DCT4X8:
790
17.1M
    case Type::DCT8X4:
791
17.2M
    case Type::AFV0:
792
17.2M
    case Type::AFV1:
793
17.3M
    case Type::AFV2:
794
17.4M
    case Type::AFV3:
795
18.1M
    case Type::IDENTITY:
796
18.1M
      dc[0] = block[0];
797
18.1M
      break;
798
19.0M
  }
799
19.0M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
800
801
}  // namespace
802
// NOLINTNEXTLINE(google-readability-namespace-comments)
803
}  // namespace HWY_NAMESPACE
804
}  // namespace jxl
805
HWY_AFTER_NAMESPACE();
806
807
#endif  // LIB_JXL_ENC_TRANSFORMS_INL_H_