Coverage Report

Created: 2025-07-16 07:53

/src/libjxl/lib/jxl/dec_transforms-inl.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#if defined(LIB_JXL_DEC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
7
#ifdef LIB_JXL_DEC_TRANSFORMS_INL_H_
8
#undef LIB_JXL_DEC_TRANSFORMS_INL_H_
9
#else
10
#define LIB_JXL_DEC_TRANSFORMS_INL_H_
11
#endif
12
13
#include <cstddef>
14
#include <hwy/highway.h>
15
16
#include "lib/jxl/ac_strategy.h"
17
#include "lib/jxl/dct-inl.h"
18
#include "lib/jxl/dct_scales.h"
19
HWY_BEFORE_NAMESPACE();
20
namespace jxl {
21
namespace HWY_NAMESPACE {
22
namespace {
23
24
// These templates are not found via ADL.
25
using hwy::HWY_NAMESPACE::MulAdd;
26
27
// Computes the lowest-frequency LF_ROWSxLF_COLS-sized square in output, which
28
// is a DCT_ROWS*DCT_COLS-sized DCT block, by doing a ROWS*COLS DCT on the
29
// input block.
30
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
31
          size_t ROWS, size_t COLS>
32
JXL_INLINE void ReinterpretingDCT(const float* input, const size_t input_stride,
33
                                  float* output, const size_t output_stride,
34
                                  float* JXL_RESTRICT block,
35
45.3k
                                  float* JXL_RESTRICT scratch_space) {
36
45.3k
  static_assert(LF_ROWS == ROWS,
37
45.3k
                "ReinterpretingDCT should only be called with LF == N");
38
45.3k
  static_assert(LF_COLS == COLS,
39
45.3k
                "ReinterpretingDCT should only be called with LF == N");
40
45.3k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
41
45.3k
                                 scratch_space);
42
45.3k
  if (ROWS < COLS) {
43
90.7k
    for (size_t y = 0; y < LF_ROWS; y++) {
44
136k
      for (size_t x = 0; x < LF_COLS; x++) {
45
90.7k
        output[y * output_stride + x] =
46
90.7k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
47
90.7k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
48
90.7k
      }
49
45.3k
    }
50
45.3k
  } else {
51
0
    for (size_t y = 0; y < LF_COLS; y++) {
52
0
      for (size_t x = 0; x < LF_ROWS; x++) {
53
0
        output[y * output_stride + x] =
54
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
55
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
56
0
      }
57
0
    }
58
0
  }
59
45.3k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
35
45.3k
                                  float* JXL_RESTRICT scratch_space) {
36
45.3k
  static_assert(LF_ROWS == ROWS,
37
45.3k
                "ReinterpretingDCT should only be called with LF == N");
38
45.3k
  static_assert(LF_COLS == COLS,
39
45.3k
                "ReinterpretingDCT should only be called with LF == N");
40
45.3k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
41
45.3k
                                 scratch_space);
42
45.3k
  if (ROWS < COLS) {
43
90.7k
    for (size_t y = 0; y < LF_ROWS; y++) {
44
136k
      for (size_t x = 0; x < LF_COLS; x++) {
45
90.7k
        output[y * output_stride + x] =
46
90.7k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
47
90.7k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
48
90.7k
      }
49
45.3k
    }
50
45.3k
  } else {
51
0
    for (size_t y = 0; y < LF_COLS; y++) {
52
0
      for (size_t x = 0; x < LF_ROWS; x++) {
53
0
        output[y * output_stride + x] =
54
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
55
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
56
0
      }
57
0
    }
58
0
  }
59
45.3k
}
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
60
61
template <size_t S>
62
288
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
63
288
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
64
288
  static_assert(S % 2 == 0, "S should be even");
65
288
  float temp[kDCTBlockSize];
66
288
  constexpr size_t num_2x2 = S / 2;
67
960
  for (size_t y = 0; y < num_2x2; y++) {
68
2.68k
    for (size_t x = 0; x < num_2x2; x++) {
69
2.01k
      float c00 = block[y * kBlockDim + x];
70
2.01k
      float c01 = block[y * kBlockDim + num_2x2 + x];
71
2.01k
      float c10 = block[(y + num_2x2) * kBlockDim + x];
72
2.01k
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
73
2.01k
      float r00 = c00 + c01 + c10 + c11;
74
2.01k
      float r01 = c00 + c01 - c10 - c11;
75
2.01k
      float r10 = c00 - c01 + c10 - c11;
76
2.01k
      float r11 = c00 - c01 - c10 + c11;
77
2.01k
      temp[y * 2 * kBlockDim + x * 2] = r00;
78
2.01k
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
79
2.01k
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
80
2.01k
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
81
2.01k
    }
82
672
  }
83
1.63k
  for (size_t y = 0; y < S; y++) {
84
9.40k
    for (size_t x = 0; x < S; x++) {
85
8.06k
      out[y * stride_out + x] = temp[y * kBlockDim + x];
86
8.06k
    }
87
1.34k
  }
88
288
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
62
96
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
63
96
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
64
96
  static_assert(S % 2 == 0, "S should be even");
65
96
  float temp[kDCTBlockSize];
66
96
  constexpr size_t num_2x2 = S / 2;
67
192
  for (size_t y = 0; y < num_2x2; y++) {
68
192
    for (size_t x = 0; x < num_2x2; x++) {
69
96
      float c00 = block[y * kBlockDim + x];
70
96
      float c01 = block[y * kBlockDim + num_2x2 + x];
71
96
      float c10 = block[(y + num_2x2) * kBlockDim + x];
72
96
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
73
96
      float r00 = c00 + c01 + c10 + c11;
74
96
      float r01 = c00 + c01 - c10 - c11;
75
96
      float r10 = c00 - c01 + c10 - c11;
76
96
      float r11 = c00 - c01 - c10 + c11;
77
96
      temp[y * 2 * kBlockDim + x * 2] = r00;
78
96
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
79
96
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
80
96
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
81
96
    }
82
96
  }
83
288
  for (size_t y = 0; y < S; y++) {
84
576
    for (size_t x = 0; x < S; x++) {
85
384
      out[y * stride_out + x] = temp[y * kBlockDim + x];
86
384
    }
87
192
  }
88
96
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
62
96
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
63
96
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
64
96
  static_assert(S % 2 == 0, "S should be even");
65
96
  float temp[kDCTBlockSize];
66
96
  constexpr size_t num_2x2 = S / 2;
67
288
  for (size_t y = 0; y < num_2x2; y++) {
68
576
    for (size_t x = 0; x < num_2x2; x++) {
69
384
      float c00 = block[y * kBlockDim + x];
70
384
      float c01 = block[y * kBlockDim + num_2x2 + x];
71
384
      float c10 = block[(y + num_2x2) * kBlockDim + x];
72
384
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
73
384
      float r00 = c00 + c01 + c10 + c11;
74
384
      float r01 = c00 + c01 - c10 - c11;
75
384
      float r10 = c00 - c01 + c10 - c11;
76
384
      float r11 = c00 - c01 - c10 + c11;
77
384
      temp[y * 2 * kBlockDim + x * 2] = r00;
78
384
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
79
384
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
80
384
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
81
384
    }
82
192
  }
83
480
  for (size_t y = 0; y < S; y++) {
84
1.92k
    for (size_t x = 0; x < S; x++) {
85
1.53k
      out[y * stride_out + x] = temp[y * kBlockDim + x];
86
1.53k
    }
87
384
  }
88
96
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
62
96
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
63
96
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
64
96
  static_assert(S % 2 == 0, "S should be even");
65
96
  float temp[kDCTBlockSize];
66
96
  constexpr size_t num_2x2 = S / 2;
67
480
  for (size_t y = 0; y < num_2x2; y++) {
68
1.92k
    for (size_t x = 0; x < num_2x2; x++) {
69
1.53k
      float c00 = block[y * kBlockDim + x];
70
1.53k
      float c01 = block[y * kBlockDim + num_2x2 + x];
71
1.53k
      float c10 = block[(y + num_2x2) * kBlockDim + x];
72
1.53k
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
73
1.53k
      float r00 = c00 + c01 + c10 + c11;
74
1.53k
      float r01 = c00 + c01 - c10 - c11;
75
1.53k
      float r10 = c00 - c01 + c10 - c11;
76
1.53k
      float r11 = c00 - c01 - c10 + c11;
77
1.53k
      temp[y * 2 * kBlockDim + x * 2] = r00;
78
1.53k
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
79
1.53k
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
80
1.53k
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
81
1.53k
    }
82
384
  }
83
864
  for (size_t y = 0; y < S; y++) {
84
6.91k
    for (size_t x = 0; x < S; x++) {
85
6.14k
      out[y * stride_out + x] = temp[y * kBlockDim + x];
86
6.14k
    }
87
768
  }
88
96
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
89
90
9.06k
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
91
9.06k
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
92
9.06k
      {
93
9.06k
          0.25,
94
9.06k
          0.25,
95
9.06k
          0.25,
96
9.06k
          0.25,
97
9.06k
          0.25,
98
9.06k
          0.25,
99
9.06k
          0.25,
100
9.06k
          0.25,
101
9.06k
          0.25,
102
9.06k
          0.25,
103
9.06k
          0.25,
104
9.06k
          0.25,
105
9.06k
          0.25,
106
9.06k
          0.25,
107
9.06k
          0.25,
108
9.06k
          0.25,
109
9.06k
      },
110
9.06k
      {
111
9.06k
          0.876902929799142f,
112
9.06k
          0.2206518106944235f,
113
9.06k
          -0.10140050393753763f,
114
9.06k
          -0.1014005039375375f,
115
9.06k
          0.2206518106944236f,
116
9.06k
          -0.10140050393753777f,
117
9.06k
          -0.10140050393753772f,
118
9.06k
          -0.10140050393753763f,
119
9.06k
          -0.10140050393753758f,
120
9.06k
          -0.10140050393753769f,
121
9.06k
          -0.1014005039375375f,
122
9.06k
          -0.10140050393753768f,
123
9.06k
          -0.10140050393753768f,
124
9.06k
          -0.10140050393753759f,
125
9.06k
          -0.10140050393753763f,
126
9.06k
          -0.10140050393753741f,
127
9.06k
      },
128
9.06k
      {
129
9.06k
          0.0,
130
9.06k
          0.0,
131
9.06k
          0.40670075830260755f,
132
9.06k
          0.44444816619734445f,
133
9.06k
          0.0,
134
9.06k
          0.0,
135
9.06k
          0.19574399372042936f,
136
9.06k
          0.2929100136981264f,
137
9.06k
          -0.40670075830260716f,
138
9.06k
          -0.19574399372042872f,
139
9.06k
          0.0,
140
9.06k
          0.11379074460448091f,
141
9.06k
          -0.44444816619734384f,
142
9.06k
          -0.29291001369812636f,
143
9.06k
          -0.1137907446044814f,
144
9.06k
          0.0,
145
9.06k
      },
146
9.06k
      {
147
9.06k
          0.0,
148
9.06k
          0.0,
149
9.06k
          -0.21255748058288748f,
150
9.06k
          0.3085497062849767f,
151
9.06k
          0.0,
152
9.06k
          0.4706702258572536f,
153
9.06k
          -0.1621205195722993f,
154
9.06k
          0.0,
155
9.06k
          -0.21255748058287047f,
156
9.06k
          -0.16212051957228327f,
157
9.06k
          -0.47067022585725277f,
158
9.06k
          -0.1464291867126764f,
159
9.06k
          0.3085497062849487f,
160
9.06k
          0.0,
161
9.06k
          -0.14642918671266536f,
162
9.06k
          0.4251149611657548f,
163
9.06k
      },
164
9.06k
      {
165
9.06k
          0.0,
166
9.06k
          -0.7071067811865474f,
167
9.06k
          0.0,
168
9.06k
          0.0,
169
9.06k
          0.7071067811865476f,
170
9.06k
          0.0,
171
9.06k
          0.0,
172
9.06k
          0.0,
173
9.06k
          0.0,
174
9.06k
          0.0,
175
9.06k
          0.0,
176
9.06k
          0.0,
177
9.06k
          0.0,
178
9.06k
          0.0,
179
9.06k
          0.0,
180
9.06k
          0.0,
181
9.06k
      },
182
9.06k
      {
183
9.06k
          -0.4105377591765233f,
184
9.06k
          0.6235485373547691f,
185
9.06k
          -0.06435071657946274f,
186
9.06k
          -0.06435071657946266f,
187
9.06k
          0.6235485373547694f,
188
9.06k
          -0.06435071657946284f,
189
9.06k
          -0.0643507165794628f,
190
9.06k
          -0.06435071657946274f,
191
9.06k
          -0.06435071657946272f,
192
9.06k
          -0.06435071657946279f,
193
9.06k
          -0.06435071657946266f,
194
9.06k
          -0.06435071657946277f,
195
9.06k
          -0.06435071657946277f,
196
9.06k
          -0.06435071657946273f,
197
9.06k
          -0.06435071657946274f,
198
9.06k
          -0.0643507165794626f,
199
9.06k
      },
200
9.06k
      {
201
9.06k
          0.0,
202
9.06k
          0.0,
203
9.06k
          -0.4517556589999482f,
204
9.06k
          0.15854503551840063f,
205
9.06k
          0.0,
206
9.06k
          -0.04038515160822202f,
207
9.06k
          0.0074182263792423875f,
208
9.06k
          0.39351034269210167f,
209
9.06k
          -0.45175565899994635f,
210
9.06k
          0.007418226379244351f,
211
9.06k
          0.1107416575309343f,
212
9.06k
          0.08298163094882051f,
213
9.06k
          0.15854503551839705f,
214
9.06k
          0.3935103426921022f,
215
9.06k
          0.0829816309488214f,
216
9.06k
          -0.45175565899994796f,
217
9.06k
      },
218
9.06k
      {
219
9.06k
          0.0,
220
9.06k
          0.0,
221
9.06k
          -0.304684750724869f,
222
9.06k
          0.5112616136591823f,
223
9.06k
          0.0,
224
9.06k
          0.0,
225
9.06k
          -0.290480129728998f,
226
9.06k
          -0.06578701549142804f,
227
9.06k
          0.304684750724884f,
228
9.06k
          0.2904801297290076f,
229
9.06k
          0.0,
230
9.06k
          -0.23889773523344604f,
231
9.06k
          -0.5112616136592012f,
232
9.06k
          0.06578701549142545f,
233
9.06k
          0.23889773523345467f,
234
9.06k
          0.0,
235
9.06k
      },
236
9.06k
      {
237
9.06k
          0.0,
238
9.06k
          0.0,
239
9.06k
          0.3017929516615495f,
240
9.06k
          0.25792362796341184f,
241
9.06k
          0.0,
242
9.06k
          0.16272340142866204f,
243
9.06k
          0.09520022653475037f,
244
9.06k
          0.0,
245
9.06k
          0.3017929516615503f,
246
9.06k
          0.09520022653475055f,
247
9.06k
          -0.16272340142866173f,
248
9.06k
          -0.35312385449816297f,
249
9.06k
          0.25792362796341295f,
250
9.06k
          0.0,
251
9.06k
          -0.3531238544981624f,
252
9.06k
          -0.6035859033230976f,
253
9.06k
      },
254
9.06k
      {
255
9.06k
          0.0,
256
9.06k
          0.0,
257
9.06k
          0.40824829046386274f,
258
9.06k
          0.0,
259
9.06k
          0.0,
260
9.06k
          0.0,
261
9.06k
          0.0,
262
9.06k
          -0.4082482904638628f,
263
9.06k
          -0.4082482904638635f,
264
9.06k
          0.0,
265
9.06k
          0.0,
266
9.06k
          -0.40824829046386296f,
267
9.06k
          0.0,
268
9.06k
          0.4082482904638634f,
269
9.06k
          0.408248290463863f,
270
9.06k
          0.0,
271
9.06k
      },
272
9.06k
      {
273
9.06k
          0.0,
274
9.06k
          0.0,
275
9.06k
          0.1747866975480809f,
276
9.06k
          0.0812611176717539f,
277
9.06k
          0.0,
278
9.06k
          0.0,
279
9.06k
          -0.3675398009862027f,
280
9.06k
          -0.307882213957909f,
281
9.06k
          -0.17478669754808135f,
282
9.06k
          0.3675398009862011f,
283
9.06k
          0.0,
284
9.06k
          0.4826689115059883f,
285
9.06k
          -0.08126111767175039f,
286
9.06k
          0.30788221395790305f,
287
9.06k
          -0.48266891150598584f,
288
9.06k
          0.0,
289
9.06k
      },
290
9.06k
      {
291
9.06k
          0.0,
292
9.06k
          0.0,
293
9.06k
          -0.21105601049335784f,
294
9.06k
          0.18567180916109802f,
295
9.06k
          0.0,
296
9.06k
          0.0,
297
9.06k
          0.49215859013738733f,
298
9.06k
          -0.38525013709251915f,
299
9.06k
          0.21105601049335806f,
300
9.06k
          -0.49215859013738905f,
301
9.06k
          0.0,
302
9.06k
          0.17419412659916217f,
303
9.06k
          -0.18567180916109904f,
304
9.06k
          0.3852501370925211f,
305
9.06k
          -0.1741941265991621f,
306
9.06k
          0.0,
307
9.06k
      },
308
9.06k
      {
309
9.06k
          0.0,
310
9.06k
          0.0,
311
9.06k
          -0.14266084808807264f,
312
9.06k
          -0.3416446842253372f,
313
9.06k
          0.0,
314
9.06k
          0.7367497537172237f,
315
9.06k
          0.24627107722075148f,
316
9.06k
          -0.08574019035519306f,
317
9.06k
          -0.14266084808807344f,
318
9.06k
          0.24627107722075137f,
319
9.06k
          0.14883399227113567f,
320
9.06k
          -0.04768680350229251f,
321
9.06k
          -0.3416446842253373f,
322
9.06k
          -0.08574019035519267f,
323
9.06k
          -0.047686803502292804f,
324
9.06k
          -0.14266084808807242f,
325
9.06k
      },
326
9.06k
      {
327
9.06k
          0.0,
328
9.06k
          0.0,
329
9.06k
          -0.13813540350758585f,
330
9.06k
          0.3302282550303788f,
331
9.06k
          0.0,
332
9.06k
          0.08755115000587084f,
333
9.06k
          -0.07946706605909573f,
334
9.06k
          -0.4613374887461511f,
335
9.06k
          -0.13813540350758294f,
336
9.06k
          -0.07946706605910261f,
337
9.06k
          0.49724647109535086f,
338
9.06k
          0.12538059448563663f,
339
9.06k
          0.3302282550303805f,
340
9.06k
          -0.4613374887461554f,
341
9.06k
          0.12538059448564315f,
342
9.06k
          -0.13813540350758452f,
343
9.06k
      },
344
9.06k
      {
345
9.06k
          0.0,
346
9.06k
          0.0,
347
9.06k
          -0.17437602599651067f,
348
9.06k
          0.0702790691196284f,
349
9.06k
          0.0,
350
9.06k
          -0.2921026642334881f,
351
9.06k
          0.3623817333531167f,
352
9.06k
          0.0,
353
9.06k
          -0.1743760259965108f,
354
9.06k
          0.36238173335311646f,
355
9.06k
          0.29210266423348785f,
356
9.06k
          -0.4326608024727445f,
357
9.06k
          0.07027906911962818f,
358
9.06k
          0.0,
359
9.06k
          -0.4326608024727457f,
360
9.06k
          0.34875205199302267f,
361
9.06k
      },
362
9.06k
      {
363
9.06k
          0.0,
364
9.06k
          0.0,
365
9.06k
          0.11354987314994337f,
366
9.06k
          -0.07417504595810355f,
367
9.06k
          0.0,
368
9.06k
          0.19402893032594343f,
369
9.06k
          -0.435190496523228f,
370
9.06k
          0.21918684838857466f,
371
9.06k
          0.11354987314994257f,
372
9.06k
          -0.4351904965232251f,
373
9.06k
          0.5550443808910661f,
374
9.06k
          -0.25468277124066463f,
375
9.06k
          -0.07417504595810233f,
376
9.06k
          0.2191868483885728f,
377
9.06k
          -0.25468277124066413f,
378
9.06k
          0.1135498731499429f,
379
9.06k
      },
380
9.06k
  };
381
382
9.06k
  const HWY_CAPPED(float, 16) d;
383
154k
  for (size_t i = 0; i < 16; i += Lanes(d)) {
384
144k
    auto pixel = Zero(d);
385
2.46M
    for (size_t j = 0; j < 16; j++) {
386
2.31M
      auto cf = Set(d, coeffs[j]);
387
2.31M
      auto basis = Load(d, k4x4AFVBasis[j] + i);
388
2.31M
      pixel = MulAdd(cf, basis, pixel);
389
2.31M
    }
390
144k
    Store(pixel, d, pixels + i);
391
144k
  }
392
9.06k
}
Unexecuted instantiation: enc_group.cc:jxl::N_SCALAR::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
dec_group.cc:jxl::N_SCALAR::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
90
9.06k
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
91
9.06k
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
92
9.06k
      {
93
9.06k
          0.25,
94
9.06k
          0.25,
95
9.06k
          0.25,
96
9.06k
          0.25,
97
9.06k
          0.25,
98
9.06k
          0.25,
99
9.06k
          0.25,
100
9.06k
          0.25,
101
9.06k
          0.25,
102
9.06k
          0.25,
103
9.06k
          0.25,
104
9.06k
          0.25,
105
9.06k
          0.25,
106
9.06k
          0.25,
107
9.06k
          0.25,
108
9.06k
          0.25,
109
9.06k
      },
110
9.06k
      {
111
9.06k
          0.876902929799142f,
112
9.06k
          0.2206518106944235f,
113
9.06k
          -0.10140050393753763f,
114
9.06k
          -0.1014005039375375f,
115
9.06k
          0.2206518106944236f,
116
9.06k
          -0.10140050393753777f,
117
9.06k
          -0.10140050393753772f,
118
9.06k
          -0.10140050393753763f,
119
9.06k
          -0.10140050393753758f,
120
9.06k
          -0.10140050393753769f,
121
9.06k
          -0.1014005039375375f,
122
9.06k
          -0.10140050393753768f,
123
9.06k
          -0.10140050393753768f,
124
9.06k
          -0.10140050393753759f,
125
9.06k
          -0.10140050393753763f,
126
9.06k
          -0.10140050393753741f,
127
9.06k
      },
128
9.06k
      {
129
9.06k
          0.0,
130
9.06k
          0.0,
131
9.06k
          0.40670075830260755f,
132
9.06k
          0.44444816619734445f,
133
9.06k
          0.0,
134
9.06k
          0.0,
135
9.06k
          0.19574399372042936f,
136
9.06k
          0.2929100136981264f,
137
9.06k
          -0.40670075830260716f,
138
9.06k
          -0.19574399372042872f,
139
9.06k
          0.0,
140
9.06k
          0.11379074460448091f,
141
9.06k
          -0.44444816619734384f,
142
9.06k
          -0.29291001369812636f,
143
9.06k
          -0.1137907446044814f,
144
9.06k
          0.0,
145
9.06k
      },
146
9.06k
      {
147
9.06k
          0.0,
148
9.06k
          0.0,
149
9.06k
          -0.21255748058288748f,
150
9.06k
          0.3085497062849767f,
151
9.06k
          0.0,
152
9.06k
          0.4706702258572536f,
153
9.06k
          -0.1621205195722993f,
154
9.06k
          0.0,
155
9.06k
          -0.21255748058287047f,
156
9.06k
          -0.16212051957228327f,
157
9.06k
          -0.47067022585725277f,
158
9.06k
          -0.1464291867126764f,
159
9.06k
          0.3085497062849487f,
160
9.06k
          0.0,
161
9.06k
          -0.14642918671266536f,
162
9.06k
          0.4251149611657548f,
163
9.06k
      },
164
9.06k
      {
165
9.06k
          0.0,
166
9.06k
          -0.7071067811865474f,
167
9.06k
          0.0,
168
9.06k
          0.0,
169
9.06k
          0.7071067811865476f,
170
9.06k
          0.0,
171
9.06k
          0.0,
172
9.06k
          0.0,
173
9.06k
          0.0,
174
9.06k
          0.0,
175
9.06k
          0.0,
176
9.06k
          0.0,
177
9.06k
          0.0,
178
9.06k
          0.0,
179
9.06k
          0.0,
180
9.06k
          0.0,
181
9.06k
      },
182
9.06k
      {
183
9.06k
          -0.4105377591765233f,
184
9.06k
          0.6235485373547691f,
185
9.06k
          -0.06435071657946274f,
186
9.06k
          -0.06435071657946266f,
187
9.06k
          0.6235485373547694f,
188
9.06k
          -0.06435071657946284f,
189
9.06k
          -0.0643507165794628f,
190
9.06k
          -0.06435071657946274f,
191
9.06k
          -0.06435071657946272f,
192
9.06k
          -0.06435071657946279f,
193
9.06k
          -0.06435071657946266f,
194
9.06k
          -0.06435071657946277f,
195
9.06k
          -0.06435071657946277f,
196
9.06k
          -0.06435071657946273f,
197
9.06k
          -0.06435071657946274f,
198
9.06k
          -0.0643507165794626f,
199
9.06k
      },
200
9.06k
      {
201
9.06k
          0.0,
202
9.06k
          0.0,
203
9.06k
          -0.4517556589999482f,
204
9.06k
          0.15854503551840063f,
205
9.06k
          0.0,
206
9.06k
          -0.04038515160822202f,
207
9.06k
          0.0074182263792423875f,
208
9.06k
          0.39351034269210167f,
209
9.06k
          -0.45175565899994635f,
210
9.06k
          0.007418226379244351f,
211
9.06k
          0.1107416575309343f,
212
9.06k
          0.08298163094882051f,
213
9.06k
          0.15854503551839705f,
214
9.06k
          0.3935103426921022f,
215
9.06k
          0.0829816309488214f,
216
9.06k
          -0.45175565899994796f,
217
9.06k
      },
218
9.06k
      {
219
9.06k
          0.0,
220
9.06k
          0.0,
221
9.06k
          -0.304684750724869f,
222
9.06k
          0.5112616136591823f,
223
9.06k
          0.0,
224
9.06k
          0.0,
225
9.06k
          -0.290480129728998f,
226
9.06k
          -0.06578701549142804f,
227
9.06k
          0.304684750724884f,
228
9.06k
          0.2904801297290076f,
229
9.06k
          0.0,
230
9.06k
          -0.23889773523344604f,
231
9.06k
          -0.5112616136592012f,
232
9.06k
          0.06578701549142545f,
233
9.06k
          0.23889773523345467f,
234
9.06k
          0.0,
235
9.06k
      },
236
9.06k
      {
237
9.06k
          0.0,
238
9.06k
          0.0,
239
9.06k
          0.3017929516615495f,
240
9.06k
          0.25792362796341184f,
241
9.06k
          0.0,
242
9.06k
          0.16272340142866204f,
243
9.06k
          0.09520022653475037f,
244
9.06k
          0.0,
245
9.06k
          0.3017929516615503f,
246
9.06k
          0.09520022653475055f,
247
9.06k
          -0.16272340142866173f,
248
9.06k
          -0.35312385449816297f,
249
9.06k
          0.25792362796341295f,
250
9.06k
          0.0,
251
9.06k
          -0.3531238544981624f,
252
9.06k
          -0.6035859033230976f,
253
9.06k
      },
254
9.06k
      {
255
9.06k
          0.0,
256
9.06k
          0.0,
257
9.06k
          0.40824829046386274f,
258
9.06k
          0.0,
259
9.06k
          0.0,
260
9.06k
          0.0,
261
9.06k
          0.0,
262
9.06k
          -0.4082482904638628f,
263
9.06k
          -0.4082482904638635f,
264
9.06k
          0.0,
265
9.06k
          0.0,
266
9.06k
          -0.40824829046386296f,
267
9.06k
          0.0,
268
9.06k
          0.4082482904638634f,
269
9.06k
          0.408248290463863f,
270
9.06k
          0.0,
271
9.06k
      },
272
9.06k
      {
273
9.06k
          0.0,
274
9.06k
          0.0,
275
9.06k
          0.1747866975480809f,
276
9.06k
          0.0812611176717539f,
277
9.06k
          0.0,
278
9.06k
          0.0,
279
9.06k
          -0.3675398009862027f,
280
9.06k
          -0.307882213957909f,
281
9.06k
          -0.17478669754808135f,
282
9.06k
          0.3675398009862011f,
283
9.06k
          0.0,
284
9.06k
          0.4826689115059883f,
285
9.06k
          -0.08126111767175039f,
286
9.06k
          0.30788221395790305f,
287
9.06k
          -0.48266891150598584f,
288
9.06k
          0.0,
289
9.06k
      },
290
9.06k
      {
291
9.06k
          0.0,
292
9.06k
          0.0,
293
9.06k
          -0.21105601049335784f,
294
9.06k
          0.18567180916109802f,
295
9.06k
          0.0,
296
9.06k
          0.0,
297
9.06k
          0.49215859013738733f,
298
9.06k
          -0.38525013709251915f,
299
9.06k
          0.21105601049335806f,
300
9.06k
          -0.49215859013738905f,
301
9.06k
          0.0,
302
9.06k
          0.17419412659916217f,
303
9.06k
          -0.18567180916109904f,
304
9.06k
          0.3852501370925211f,
305
9.06k
          -0.1741941265991621f,
306
9.06k
          0.0,
307
9.06k
      },
308
9.06k
      {
309
9.06k
          0.0,
310
9.06k
          0.0,
311
9.06k
          -0.14266084808807264f,
312
9.06k
          -0.3416446842253372f,
313
9.06k
          0.0,
314
9.06k
          0.7367497537172237f,
315
9.06k
          0.24627107722075148f,
316
9.06k
          -0.08574019035519306f,
317
9.06k
          -0.14266084808807344f,
318
9.06k
          0.24627107722075137f,
319
9.06k
          0.14883399227113567f,
320
9.06k
          -0.04768680350229251f,
321
9.06k
          -0.3416446842253373f,
322
9.06k
          -0.08574019035519267f,
323
9.06k
          -0.047686803502292804f,
324
9.06k
          -0.14266084808807242f,
325
9.06k
      },
326
9.06k
      {
327
9.06k
          0.0,
328
9.06k
          0.0,
329
9.06k
          -0.13813540350758585f,
330
9.06k
          0.3302282550303788f,
331
9.06k
          0.0,
332
9.06k
          0.08755115000587084f,
333
9.06k
          -0.07946706605909573f,
334
9.06k
          -0.4613374887461511f,
335
9.06k
          -0.13813540350758294f,
336
9.06k
          -0.07946706605910261f,
337
9.06k
          0.49724647109535086f,
338
9.06k
          0.12538059448563663f,
339
9.06k
          0.3302282550303805f,
340
9.06k
          -0.4613374887461554f,
341
9.06k
          0.12538059448564315f,
342
9.06k
          -0.13813540350758452f,
343
9.06k
      },
344
9.06k
      {
345
9.06k
          0.0,
346
9.06k
          0.0,
347
9.06k
          -0.17437602599651067f,
348
9.06k
          0.0702790691196284f,
349
9.06k
          0.0,
350
9.06k
          -0.2921026642334881f,
351
9.06k
          0.3623817333531167f,
352
9.06k
          0.0,
353
9.06k
          -0.1743760259965108f,
354
9.06k
          0.36238173335311646f,
355
9.06k
          0.29210266423348785f,
356
9.06k
          -0.4326608024727445f,
357
9.06k
          0.07027906911962818f,
358
9.06k
          0.0,
359
9.06k
          -0.4326608024727457f,
360
9.06k
          0.34875205199302267f,
361
9.06k
      },
362
9.06k
      {
363
9.06k
          0.0,
364
9.06k
          0.0,
365
9.06k
          0.11354987314994337f,
366
9.06k
          -0.07417504595810355f,
367
9.06k
          0.0,
368
9.06k
          0.19402893032594343f,
369
9.06k
          -0.435190496523228f,
370
9.06k
          0.21918684838857466f,
371
9.06k
          0.11354987314994257f,
372
9.06k
          -0.4351904965232251f,
373
9.06k
          0.5550443808910661f,
374
9.06k
          -0.25468277124066463f,
375
9.06k
          -0.07417504595810233f,
376
9.06k
          0.2191868483885728f,
377
9.06k
          -0.25468277124066413f,
378
9.06k
          0.1135498731499429f,
379
9.06k
      },
380
9.06k
  };
381
382
9.06k
  const HWY_CAPPED(float, 16) d;
383
154k
  for (size_t i = 0; i < 16; i += Lanes(d)) {
384
144k
    auto pixel = Zero(d);
385
2.46M
    for (size_t j = 0; j < 16; j++) {
386
2.31M
      auto cf = Set(d, coeffs[j]);
387
2.31M
      auto basis = Load(d, k4x4AFVBasis[j] + i);
388
2.31M
      pixel = MulAdd(cf, basis, pixel);
389
2.31M
    }
390
144k
    Store(pixel, d, pixels + i);
391
144k
  }
392
9.06k
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SCALAR::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SCALAR::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
393
394
template <size_t afv_kind>
395
void AFVTransformToPixels(const float* JXL_RESTRICT coefficients,
396
9.06k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
397
9.06k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
398
9.06k
  size_t afv_x = afv_kind & 1;
399
9.06k
  size_t afv_y = afv_kind / 2;
400
9.06k
  float dcs[3] = {};
401
9.06k
  float block00 = coefficients[0];
402
9.06k
  float block01 = coefficients[1];
403
9.06k
  float block10 = coefficients[8];
404
9.06k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
405
9.06k
  dcs[1] = (block00 + block10 - block01);
406
9.06k
  dcs[2] = block00 - block10;
407
  // IAFV: (even, even) positions.
408
9.06k
  HWY_ALIGN float coeff[4 * 4];
409
9.06k
  coeff[0] = dcs[0];
410
45.3k
  for (size_t iy = 0; iy < 4; iy++) {
411
181k
    for (size_t ix = 0; ix < 4; ix++) {
412
144k
      if (ix == 0 && iy == 0) continue;
413
135k
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
414
135k
    }
415
36.2k
  }
416
9.06k
  HWY_ALIGN float block[4 * 8];
417
9.06k
  AFVIDCT4x4(coeff, block);
418
45.3k
  for (size_t iy = 0; iy < 4; iy++) {
419
181k
    for (size_t ix = 0; ix < 4; ix++) {
420
144k
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
421
144k
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
422
144k
    }
423
36.2k
  }
424
  // IDCT4x4 in (odd, even) positions.
425
9.06k
  block[0] = dcs[1];
426
45.3k
  for (size_t iy = 0; iy < 4; iy++) {
427
181k
    for (size_t ix = 0; ix < 4; ix++) {
428
144k
      if (ix == 0 && iy == 0) continue;
429
135k
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
430
135k
    }
431
36.2k
  }
432
9.06k
  ComputeScaledIDCT<4, 4>()(
433
9.06k
      block,
434
9.06k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
435
9.06k
            pixels_stride),
436
9.06k
      scratch_space);
437
  // IDCT4x8.
438
9.06k
  block[0] = dcs[2];
439
45.3k
  for (size_t iy = 0; iy < 4; iy++) {
440
326k
    for (size_t ix = 0; ix < 8; ix++) {
441
289k
      if (ix == 0 && iy == 0) continue;
442
280k
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
443
280k
    }
444
36.2k
  }
445
9.06k
  ComputeScaledIDCT<4, 8>()(
446
9.06k
      block,
447
9.06k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
448
9.06k
      scratch_space);
449
9.06k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
396
690
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
397
690
  HWY_ALIGN float scratch_space[4 * 8 * 4];
398
690
  size_t afv_x = afv_kind & 1;
399
690
  size_t afv_y = afv_kind / 2;
400
690
  float dcs[3] = {};
401
690
  float block00 = coefficients[0];
402
690
  float block01 = coefficients[1];
403
690
  float block10 = coefficients[8];
404
690
  dcs[0] = (block00 + block10 + block01) * 4.0f;
405
690
  dcs[1] = (block00 + block10 - block01);
406
690
  dcs[2] = block00 - block10;
407
  // IAFV: (even, even) positions.
408
690
  HWY_ALIGN float coeff[4 * 4];
409
690
  coeff[0] = dcs[0];
410
3.45k
  for (size_t iy = 0; iy < 4; iy++) {
411
13.8k
    for (size_t ix = 0; ix < 4; ix++) {
412
11.0k
      if (ix == 0 && iy == 0) continue;
413
10.3k
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
414
10.3k
    }
415
2.76k
  }
416
690
  HWY_ALIGN float block[4 * 8];
417
690
  AFVIDCT4x4(coeff, block);
418
3.45k
  for (size_t iy = 0; iy < 4; iy++) {
419
13.8k
    for (size_t ix = 0; ix < 4; ix++) {
420
11.0k
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
421
11.0k
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
422
11.0k
    }
423
2.76k
  }
424
  // IDCT4x4 in (odd, even) positions.
425
690
  block[0] = dcs[1];
426
3.45k
  for (size_t iy = 0; iy < 4; iy++) {
427
13.8k
    for (size_t ix = 0; ix < 4; ix++) {
428
11.0k
      if (ix == 0 && iy == 0) continue;
429
10.3k
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
430
10.3k
    }
431
2.76k
  }
432
690
  ComputeScaledIDCT<4, 4>()(
433
690
      block,
434
690
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
435
690
            pixels_stride),
436
690
      scratch_space);
437
  // IDCT4x8.
438
690
  block[0] = dcs[2];
439
3.45k
  for (size_t iy = 0; iy < 4; iy++) {
440
24.8k
    for (size_t ix = 0; ix < 8; ix++) {
441
22.0k
      if (ix == 0 && iy == 0) continue;
442
21.3k
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
443
21.3k
    }
444
2.76k
  }
445
690
  ComputeScaledIDCT<4, 8>()(
446
690
      block,
447
690
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
448
690
      scratch_space);
449
690
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
396
8.37k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
397
8.37k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
398
8.37k
  size_t afv_x = afv_kind & 1;
399
8.37k
  size_t afv_y = afv_kind / 2;
400
8.37k
  float dcs[3] = {};
401
8.37k
  float block00 = coefficients[0];
402
8.37k
  float block01 = coefficients[1];
403
8.37k
  float block10 = coefficients[8];
404
8.37k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
405
8.37k
  dcs[1] = (block00 + block10 - block01);
406
8.37k
  dcs[2] = block00 - block10;
407
  // IAFV: (even, even) positions.
408
8.37k
  HWY_ALIGN float coeff[4 * 4];
409
8.37k
  coeff[0] = dcs[0];
410
41.8k
  for (size_t iy = 0; iy < 4; iy++) {
411
167k
    for (size_t ix = 0; ix < 4; ix++) {
412
133k
      if (ix == 0 && iy == 0) continue;
413
125k
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
414
125k
    }
415
33.4k
  }
416
8.37k
  HWY_ALIGN float block[4 * 8];
417
8.37k
  AFVIDCT4x4(coeff, block);
418
41.8k
  for (size_t iy = 0; iy < 4; iy++) {
419
167k
    for (size_t ix = 0; ix < 4; ix++) {
420
133k
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
421
133k
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
422
133k
    }
423
33.4k
  }
424
  // IDCT4x4 in (odd, even) positions.
425
8.37k
  block[0] = dcs[1];
426
41.8k
  for (size_t iy = 0; iy < 4; iy++) {
427
167k
    for (size_t ix = 0; ix < 4; ix++) {
428
133k
      if (ix == 0 && iy == 0) continue;
429
125k
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
430
125k
    }
431
33.4k
  }
432
8.37k
  ComputeScaledIDCT<4, 4>()(
433
8.37k
      block,
434
8.37k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
435
8.37k
            pixels_stride),
436
8.37k
      scratch_space);
437
  // IDCT4x8.
438
8.37k
  block[0] = dcs[2];
439
41.8k
  for (size_t iy = 0; iy < 4; iy++) {
440
301k
    for (size_t ix = 0; ix < 8; ix++) {
441
267k
      if (ix == 0 && iy == 0) continue;
442
259k
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
443
259k
    }
444
33.4k
  }
445
8.37k
  ComputeScaledIDCT<4, 8>()(
446
8.37k
      block,
447
8.37k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
448
8.37k
      scratch_space);
449
8.37k
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
450
451
HWY_MAYBE_UNUSED void TransformToPixels(const AcStrategyType strategy,
452
                                        float* JXL_RESTRICT coefficients,
453
                                        float* JXL_RESTRICT pixels,
454
                                        size_t pixels_stride,
455
99.8k
                                        float* scratch_space) {
456
99.8k
  using Type = AcStrategyType;
457
99.8k
  switch (strategy) {
458
24.3k
    case Type::IDENTITY: {
459
24.3k
      float dcs[4] = {};
460
24.3k
      float block00 = coefficients[0];
461
24.3k
      float block01 = coefficients[1];
462
24.3k
      float block10 = coefficients[8];
463
24.3k
      float block11 = coefficients[9];
464
24.3k
      dcs[0] = block00 + block01 + block10 + block11;
465
24.3k
      dcs[1] = block00 + block01 - block10 - block11;
466
24.3k
      dcs[2] = block00 - block01 + block10 - block11;
467
24.3k
      dcs[3] = block00 - block01 - block10 + block11;
468
73.1k
      for (size_t y = 0; y < 2; y++) {
469
146k
        for (size_t x = 0; x < 2; x++) {
470
97.4k
          float block_dc = dcs[y * 2 + x];
471
97.4k
          float residual_sum = 0;
472
487k
          for (size_t iy = 0; iy < 4; iy++) {
473
1.94M
            for (size_t ix = 0; ix < 4; ix++) {
474
1.55M
              if (ix == 0 && iy == 0) continue;
475
1.46M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
476
1.46M
            }
477
389k
          }
478
97.4k
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
479
97.4k
              block_dc - residual_sum * (1.0f / 16);
480
487k
          for (size_t iy = 0; iy < 4; iy++) {
481
1.94M
            for (size_t ix = 0; ix < 4; ix++) {
482
1.55M
              if (ix == 1 && iy == 1) continue;
483
1.46M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
484
1.46M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
485
1.46M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
486
1.46M
            }
487
389k
          }
488
97.4k
          pixels[y * 4 * pixels_stride + x * 4] =
489
97.4k
              coefficients[(y + 2) * 8 + x + 2] +
490
97.4k
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
97.4k
        }
492
48.7k
      }
493
24.3k
      break;
494
0
    }
495
0
    case Type::DCT8X4: {
496
0
      float dcs[2] = {};
497
0
      float block0 = coefficients[0];
498
0
      float block1 = coefficients[8];
499
0
      dcs[0] = block0 + block1;
500
0
      dcs[1] = block0 - block1;
501
0
      for (size_t x = 0; x < 2; x++) {
502
0
        HWY_ALIGN float block[4 * 8];
503
0
        block[0] = dcs[x];
504
0
        for (size_t iy = 0; iy < 4; iy++) {
505
0
          for (size_t ix = 0; ix < 8; ix++) {
506
0
            if (ix == 0 && iy == 0) continue;
507
0
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
508
0
          }
509
0
        }
510
0
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
511
0
                                  scratch_space);
512
0
      }
513
0
      break;
514
0
    }
515
0
    case Type::DCT4X8: {
516
0
      float dcs[2] = {};
517
0
      float block0 = coefficients[0];
518
0
      float block1 = coefficients[8];
519
0
      dcs[0] = block0 + block1;
520
0
      dcs[1] = block0 - block1;
521
0
      for (size_t y = 0; y < 2; y++) {
522
0
        HWY_ALIGN float block[4 * 8];
523
0
        block[0] = dcs[y];
524
0
        for (size_t iy = 0; iy < 4; iy++) {
525
0
          for (size_t ix = 0; ix < 8; ix++) {
526
0
            if (ix == 0 && iy == 0) continue;
527
0
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
528
0
          }
529
0
        }
530
0
        ComputeScaledIDCT<4, 8>()(
531
0
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
532
0
            scratch_space);
533
0
      }
534
0
      break;
535
0
    }
536
12
    case Type::DCT4X4: {
537
12
      float dcs[4] = {};
538
12
      float block00 = coefficients[0];
539
12
      float block01 = coefficients[1];
540
12
      float block10 = coefficients[8];
541
12
      float block11 = coefficients[9];
542
12
      dcs[0] = block00 + block01 + block10 + block11;
543
12
      dcs[1] = block00 + block01 - block10 - block11;
544
12
      dcs[2] = block00 - block01 + block10 - block11;
545
12
      dcs[3] = block00 - block01 - block10 + block11;
546
36
      for (size_t y = 0; y < 2; y++) {
547
72
        for (size_t x = 0; x < 2; x++) {
548
48
          HWY_ALIGN float block[4 * 4];
549
48
          block[0] = dcs[y * 2 + x];
550
240
          for (size_t iy = 0; iy < 4; iy++) {
551
960
            for (size_t ix = 0; ix < 4; ix++) {
552
768
              if (ix == 0 && iy == 0) continue;
553
720
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
554
720
            }
555
192
          }
556
48
          ComputeScaledIDCT<4, 4>()(
557
48
              block,
558
48
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
559
48
              scratch_space);
560
48
        }
561
24
      }
562
12
      break;
563
0
    }
564
96
    case Type::DCT2X2: {
565
96
      HWY_ALIGN float coeffs[kDCTBlockSize];
566
96
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
567
96
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
568
96
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
569
96
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
570
864
      for (size_t y = 0; y < kBlockDim; y++) {
571
6.91k
        for (size_t x = 0; x < kBlockDim; x++) {
572
6.14k
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
573
6.14k
        }
574
768
      }
575
96
      break;
576
0
    }
577
0
    case Type::DCT16X16: {
578
0
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
579
0
                                  scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT16X8: {
583
0
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
584
0
                                 scratch_space);
585
0
      break;
586
0
    }
587
45.3k
    case Type::DCT8X16: {
588
45.3k
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
589
45.3k
                                 scratch_space);
590
45.3k
      break;
591
0
    }
592
0
    case Type::DCT32X8: {
593
0
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
594
0
                                 scratch_space);
595
0
      break;
596
0
    }
597
0
    case Type::DCT8X32: {
598
0
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
599
0
                                 scratch_space);
600
0
      break;
601
0
    }
602
0
    case Type::DCT32X16: {
603
0
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
604
0
                                  scratch_space);
605
0
      break;
606
0
    }
607
0
    case Type::DCT16X32: {
608
0
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
609
0
                                  scratch_space);
610
0
      break;
611
0
    }
612
0
    case Type::DCT32X32: {
613
0
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
0
                                  scratch_space);
615
0
      break;
616
0
    }
617
20.9k
    case Type::DCT: {
618
20.9k
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
619
20.9k
                                scratch_space);
620
20.9k
      break;
621
0
    }
622
0
    case Type::AFV0: {
623
0
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
624
0
      break;
625
0
    }
626
0
    case Type::AFV1: {
627
0
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
628
0
      break;
629
0
    }
630
690
    case Type::AFV2: {
631
690
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
632
690
      break;
633
0
    }
634
8.37k
    case Type::AFV3: {
635
8.37k
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
636
8.37k
      break;
637
0
    }
638
0
    case Type::DCT64X32: {
639
0
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
640
0
                                  scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT32X64: {
644
0
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X64: {
649
0
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT128X64: {
654
0
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT64X128: {
659
0
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X128: {
664
0
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
0
                                    scratch_space);
666
0
      break;
667
0
    }
668
0
    case Type::DCT256X128: {
669
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
0
                                    scratch_space);
671
0
      break;
672
0
    }
673
0
    case Type::DCT128X256: {
674
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT256X256: {
679
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
99.8k
  }
684
99.8k
}
Unexecuted instantiation: enc_group.cc:jxl::N_SCALAR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
dec_group.cc:jxl::N_SCALAR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
455
99.8k
                                        float* scratch_space) {
456
99.8k
  using Type = AcStrategyType;
457
99.8k
  switch (strategy) {
458
24.3k
    case Type::IDENTITY: {
459
24.3k
      float dcs[4] = {};
460
24.3k
      float block00 = coefficients[0];
461
24.3k
      float block01 = coefficients[1];
462
24.3k
      float block10 = coefficients[8];
463
24.3k
      float block11 = coefficients[9];
464
24.3k
      dcs[0] = block00 + block01 + block10 + block11;
465
24.3k
      dcs[1] = block00 + block01 - block10 - block11;
466
24.3k
      dcs[2] = block00 - block01 + block10 - block11;
467
24.3k
      dcs[3] = block00 - block01 - block10 + block11;
468
73.1k
      for (size_t y = 0; y < 2; y++) {
469
146k
        for (size_t x = 0; x < 2; x++) {
470
97.4k
          float block_dc = dcs[y * 2 + x];
471
97.4k
          float residual_sum = 0;
472
487k
          for (size_t iy = 0; iy < 4; iy++) {
473
1.94M
            for (size_t ix = 0; ix < 4; ix++) {
474
1.55M
              if (ix == 0 && iy == 0) continue;
475
1.46M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
476
1.46M
            }
477
389k
          }
478
97.4k
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
479
97.4k
              block_dc - residual_sum * (1.0f / 16);
480
487k
          for (size_t iy = 0; iy < 4; iy++) {
481
1.94M
            for (size_t ix = 0; ix < 4; ix++) {
482
1.55M
              if (ix == 1 && iy == 1) continue;
483
1.46M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
484
1.46M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
485
1.46M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
486
1.46M
            }
487
389k
          }
488
97.4k
          pixels[y * 4 * pixels_stride + x * 4] =
489
97.4k
              coefficients[(y + 2) * 8 + x + 2] +
490
97.4k
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
97.4k
        }
492
48.7k
      }
493
24.3k
      break;
494
0
    }
495
0
    case Type::DCT8X4: {
496
0
      float dcs[2] = {};
497
0
      float block0 = coefficients[0];
498
0
      float block1 = coefficients[8];
499
0
      dcs[0] = block0 + block1;
500
0
      dcs[1] = block0 - block1;
501
0
      for (size_t x = 0; x < 2; x++) {
502
0
        HWY_ALIGN float block[4 * 8];
503
0
        block[0] = dcs[x];
504
0
        for (size_t iy = 0; iy < 4; iy++) {
505
0
          for (size_t ix = 0; ix < 8; ix++) {
506
0
            if (ix == 0 && iy == 0) continue;
507
0
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
508
0
          }
509
0
        }
510
0
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
511
0
                                  scratch_space);
512
0
      }
513
0
      break;
514
0
    }
515
0
    case Type::DCT4X8: {
516
0
      float dcs[2] = {};
517
0
      float block0 = coefficients[0];
518
0
      float block1 = coefficients[8];
519
0
      dcs[0] = block0 + block1;
520
0
      dcs[1] = block0 - block1;
521
0
      for (size_t y = 0; y < 2; y++) {
522
0
        HWY_ALIGN float block[4 * 8];
523
0
        block[0] = dcs[y];
524
0
        for (size_t iy = 0; iy < 4; iy++) {
525
0
          for (size_t ix = 0; ix < 8; ix++) {
526
0
            if (ix == 0 && iy == 0) continue;
527
0
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
528
0
          }
529
0
        }
530
0
        ComputeScaledIDCT<4, 8>()(
531
0
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
532
0
            scratch_space);
533
0
      }
534
0
      break;
535
0
    }
536
12
    case Type::DCT4X4: {
537
12
      float dcs[4] = {};
538
12
      float block00 = coefficients[0];
539
12
      float block01 = coefficients[1];
540
12
      float block10 = coefficients[8];
541
12
      float block11 = coefficients[9];
542
12
      dcs[0] = block00 + block01 + block10 + block11;
543
12
      dcs[1] = block00 + block01 - block10 - block11;
544
12
      dcs[2] = block00 - block01 + block10 - block11;
545
12
      dcs[3] = block00 - block01 - block10 + block11;
546
36
      for (size_t y = 0; y < 2; y++) {
547
72
        for (size_t x = 0; x < 2; x++) {
548
48
          HWY_ALIGN float block[4 * 4];
549
48
          block[0] = dcs[y * 2 + x];
550
240
          for (size_t iy = 0; iy < 4; iy++) {
551
960
            for (size_t ix = 0; ix < 4; ix++) {
552
768
              if (ix == 0 && iy == 0) continue;
553
720
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
554
720
            }
555
192
          }
556
48
          ComputeScaledIDCT<4, 4>()(
557
48
              block,
558
48
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
559
48
              scratch_space);
560
48
        }
561
24
      }
562
12
      break;
563
0
    }
564
96
    case Type::DCT2X2: {
565
96
      HWY_ALIGN float coeffs[kDCTBlockSize];
566
96
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
567
96
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
568
96
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
569
96
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
570
864
      for (size_t y = 0; y < kBlockDim; y++) {
571
6.91k
        for (size_t x = 0; x < kBlockDim; x++) {
572
6.14k
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
573
6.14k
        }
574
768
      }
575
96
      break;
576
0
    }
577
0
    case Type::DCT16X16: {
578
0
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
579
0
                                  scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT16X8: {
583
0
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
584
0
                                 scratch_space);
585
0
      break;
586
0
    }
587
45.3k
    case Type::DCT8X16: {
588
45.3k
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
589
45.3k
                                 scratch_space);
590
45.3k
      break;
591
0
    }
592
0
    case Type::DCT32X8: {
593
0
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
594
0
                                 scratch_space);
595
0
      break;
596
0
    }
597
0
    case Type::DCT8X32: {
598
0
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
599
0
                                 scratch_space);
600
0
      break;
601
0
    }
602
0
    case Type::DCT32X16: {
603
0
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
604
0
                                  scratch_space);
605
0
      break;
606
0
    }
607
0
    case Type::DCT16X32: {
608
0
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
609
0
                                  scratch_space);
610
0
      break;
611
0
    }
612
0
    case Type::DCT32X32: {
613
0
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
0
                                  scratch_space);
615
0
      break;
616
0
    }
617
20.9k
    case Type::DCT: {
618
20.9k
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
619
20.9k
                                scratch_space);
620
20.9k
      break;
621
0
    }
622
0
    case Type::AFV0: {
623
0
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
624
0
      break;
625
0
    }
626
0
    case Type::AFV1: {
627
0
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
628
0
      break;
629
0
    }
630
690
    case Type::AFV2: {
631
690
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
632
690
      break;
633
0
    }
634
8.37k
    case Type::AFV3: {
635
8.37k
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
636
8.37k
      break;
637
0
    }
638
0
    case Type::DCT64X32: {
639
0
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
640
0
                                  scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT32X64: {
644
0
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X64: {
649
0
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT128X64: {
654
0
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT64X128: {
659
0
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X128: {
664
0
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
0
                                    scratch_space);
666
0
      break;
667
0
    }
668
0
    case Type::DCT256X128: {
669
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
0
                                    scratch_space);
671
0
      break;
672
0
    }
673
0
    case Type::DCT128X256: {
674
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT256X256: {
679
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
99.8k
  }
684
99.8k
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SCALAR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SCALAR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
685
686
HWY_MAYBE_UNUSED void LowestFrequenciesFromDC(const AcStrategyType strategy,
687
                                              const float* dc, size_t dc_stride,
688
                                              float* llf,
689
99.8k
                                              float* JXL_RESTRICT scratch) {
690
99.8k
  using Type = AcStrategyType;
691
99.8k
  HWY_ALIGN float warm_block[4 * 4];
692
99.8k
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
693
99.8k
  switch (strategy) {
694
0
    case Type::DCT16X8: {
695
0
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
696
0
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
697
0
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
698
0
      break;
699
0
    }
700
45.3k
    case Type::DCT8X16: {
701
45.3k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
702
45.3k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
703
45.3k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
704
45.3k
      break;
705
0
    }
706
0
    case Type::DCT16X16: {
707
0
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
708
0
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
709
0
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
710
0
      break;
711
0
    }
712
0
    case Type::DCT32X8: {
713
0
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
714
0
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
715
0
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
716
0
      break;
717
0
    }
718
0
    case Type::DCT8X32: {
719
0
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
720
0
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
721
0
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
722
0
      break;
723
0
    }
724
0
    case Type::DCT32X16: {
725
0
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
726
0
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
727
0
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
728
0
      break;
729
0
    }
730
0
    case Type::DCT16X32: {
731
0
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
732
0
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
733
0
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
734
0
      break;
735
0
    }
736
0
    case Type::DCT32X32: {
737
0
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
738
0
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
739
0
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
740
0
      break;
741
0
    }
742
0
    case Type::DCT64X32: {
743
0
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
744
0
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
745
0
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
746
0
      break;
747
0
    }
748
0
    case Type::DCT32X64: {
749
0
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
750
0
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
751
0
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
752
0
      break;
753
0
    }
754
0
    case Type::DCT64X64: {
755
0
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
756
0
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
757
0
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
758
0
      break;
759
0
    }
760
0
    case Type::DCT128X64: {
761
0
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
762
0
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
763
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
764
0
      break;
765
0
    }
766
0
    case Type::DCT64X128: {
767
0
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
769
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X128: {
773
0
      ReinterpretingDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
776
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X128: {
780
0
      ReinterpretingDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
783
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
784
0
      break;
785
0
    }
786
0
    case Type::DCT128X256: {
787
0
      ReinterpretingDCT<
788
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
789
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
790
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
791
0
      break;
792
0
    }
793
0
    case Type::DCT256X256: {
794
0
      ReinterpretingDCT<
795
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
796
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
797
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
798
0
      break;
799
0
    }
800
20.9k
    case Type::DCT:
801
21.0k
    case Type::DCT2X2:
802
21.0k
    case Type::DCT4X4:
803
21.0k
    case Type::DCT4X8:
804
21.0k
    case Type::DCT8X4:
805
21.0k
    case Type::AFV0:
806
21.0k
    case Type::AFV1:
807
21.7k
    case Type::AFV2:
808
30.1k
    case Type::AFV3:
809
54.4k
    case Type::IDENTITY:
810
54.4k
      llf[0] = dc[0];
811
54.4k
      break;
812
99.8k
  };
813
99.8k
}
Unexecuted instantiation: enc_group.cc:jxl::N_SCALAR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
dec_group.cc:jxl::N_SCALAR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
689
99.8k
                                              float* JXL_RESTRICT scratch) {
690
99.8k
  using Type = AcStrategyType;
691
99.8k
  HWY_ALIGN float warm_block[4 * 4];
692
99.8k
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
693
99.8k
  switch (strategy) {
694
0
    case Type::DCT16X8: {
695
0
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
696
0
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
697
0
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
698
0
      break;
699
0
    }
700
45.3k
    case Type::DCT8X16: {
701
45.3k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
702
45.3k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
703
45.3k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
704
45.3k
      break;
705
0
    }
706
0
    case Type::DCT16X16: {
707
0
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
708
0
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
709
0
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
710
0
      break;
711
0
    }
712
0
    case Type::DCT32X8: {
713
0
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
714
0
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
715
0
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
716
0
      break;
717
0
    }
718
0
    case Type::DCT8X32: {
719
0
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
720
0
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
721
0
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
722
0
      break;
723
0
    }
724
0
    case Type::DCT32X16: {
725
0
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
726
0
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
727
0
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
728
0
      break;
729
0
    }
730
0
    case Type::DCT16X32: {
731
0
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
732
0
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
733
0
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
734
0
      break;
735
0
    }
736
0
    case Type::DCT32X32: {
737
0
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
738
0
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
739
0
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
740
0
      break;
741
0
    }
742
0
    case Type::DCT64X32: {
743
0
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
744
0
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
745
0
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
746
0
      break;
747
0
    }
748
0
    case Type::DCT32X64: {
749
0
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
750
0
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
751
0
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
752
0
      break;
753
0
    }
754
0
    case Type::DCT64X64: {
755
0
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
756
0
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
757
0
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
758
0
      break;
759
0
    }
760
0
    case Type::DCT128X64: {
761
0
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
762
0
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
763
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
764
0
      break;
765
0
    }
766
0
    case Type::DCT64X128: {
767
0
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
769
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X128: {
773
0
      ReinterpretingDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
776
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X128: {
780
0
      ReinterpretingDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
783
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
784
0
      break;
785
0
    }
786
0
    case Type::DCT128X256: {
787
0
      ReinterpretingDCT<
788
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
789
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
790
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
791
0
      break;
792
0
    }
793
0
    case Type::DCT256X256: {
794
0
      ReinterpretingDCT<
795
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
796
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
797
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
798
0
      break;
799
0
    }
800
20.9k
    case Type::DCT:
801
21.0k
    case Type::DCT2X2:
802
21.0k
    case Type::DCT4X4:
803
21.0k
    case Type::DCT4X8:
804
21.0k
    case Type::DCT8X4:
805
21.0k
    case Type::AFV0:
806
21.0k
    case Type::AFV1:
807
21.7k
    case Type::AFV2:
808
30.1k
    case Type::AFV3:
809
54.4k
    case Type::IDENTITY:
810
54.4k
      llf[0] = dc[0];
811
54.4k
      break;
812
99.8k
  };
813
99.8k
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SCALAR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SCALAR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
814
815
}  // namespace
816
// NOLINTNEXTLINE(google-readability-namespace-comments)
817
}  // namespace HWY_NAMESPACE
818
}  // namespace jxl
819
HWY_AFTER_NAMESPACE();
820
821
#endif  // LIB_JXL_DEC_TRANSFORMS_INL_H_