Coverage Report

Created: 2026-04-01 07:24

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/dec_transforms-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include <cstring>
7
8
#include "lib/jxl/base/compiler_specific.h"
9
#include "lib/jxl/frame_dimensions.h"
10
11
#if defined(LIB_JXL_DEC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
12
#ifdef LIB_JXL_DEC_TRANSFORMS_INL_H_
13
#undef LIB_JXL_DEC_TRANSFORMS_INL_H_
14
#else
15
#define LIB_JXL_DEC_TRANSFORMS_INL_H_
16
#endif
17
18
#include <cstddef>
19
#include <hwy/highway.h>
20
21
#include "lib/jxl/ac_strategy.h"
22
#include "lib/jxl/dct-inl.h"
23
#include "lib/jxl/dct_scales.h"
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
namespace HWY_NAMESPACE {
27
namespace {
28
29
// These templates are not found via ADL.
30
using hwy::HWY_NAMESPACE::MulAdd;
31
32
// Computes the lowest-frequency LF_ROWSxLF_COLS-sized square in output, which
33
// is a DCT_ROWS*DCT_COLS-sized DCT block, by doing a ROWS*COLS DCT on the
34
// input block.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
JXL_INLINE void ReinterpretingDCT(const float* input, const size_t input_stride,
38
                                  float* output, const size_t output_stride,
39
                                  float* JXL_RESTRICT block,
40
62.8k
                                  float* JXL_RESTRICT scratch_space) {
41
62.8k
  static_assert(LF_ROWS == ROWS,
42
62.8k
                "ReinterpretingDCT should only be called with LF == N");
43
62.8k
  static_assert(LF_COLS == COLS,
44
62.8k
                "ReinterpretingDCT should only be called with LF == N");
45
62.8k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
62.8k
                                 scratch_space);
47
62.8k
  if (ROWS < COLS) {
48
121k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
182k
      for (size_t x = 0; x < LF_COLS; x++) {
50
122k
        output[y * output_stride + x] =
51
122k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
122k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
122k
      }
54
60.8k
    }
55
60.6k
  } else {
56
6.01k
    for (size_t y = 0; y < LF_COLS; y++) {
57
13.8k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
10.0k
        output[y * output_stride + x] =
59
10.0k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
10.0k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
10.0k
      }
62
3.86k
    }
63
2.15k
  }
64
62.8k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
522
                                  float* JXL_RESTRICT scratch_space) {
41
522
  static_assert(LF_ROWS == ROWS,
42
522
                "ReinterpretingDCT should only be called with LF == N");
43
522
  static_assert(LF_COLS == COLS,
44
522
                "ReinterpretingDCT should only be called with LF == N");
45
522
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
522
                                 scratch_space);
47
522
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
522
  } else {
56
1.04k
    for (size_t y = 0; y < LF_COLS; y++) {
57
1.56k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.04k
        output[y * output_stride + x] =
59
1.04k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.04k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.04k
      }
62
522
    }
63
522
  }
64
522
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
60.6k
                                  float* JXL_RESTRICT scratch_space) {
41
60.6k
  static_assert(LF_ROWS == ROWS,
42
60.6k
                "ReinterpretingDCT should only be called with LF == N");
43
60.6k
  static_assert(LF_COLS == COLS,
44
60.6k
                "ReinterpretingDCT should only be called with LF == N");
45
60.6k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
60.6k
                                 scratch_space);
47
60.6k
  if (ROWS < COLS) {
48
121k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
181k
      for (size_t x = 0; x < LF_COLS; x++) {
50
120k
        output[y * output_stride + x] =
51
120k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
120k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
120k
      }
54
60.5k
    }
55
60.5k
  } else {
56
100
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
100
  }
64
60.6k
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
1.10k
                                  float* JXL_RESTRICT scratch_space) {
41
1.10k
  static_assert(LF_ROWS == ROWS,
42
1.10k
                "ReinterpretingDCT should only be called with LF == N");
43
1.10k
  static_assert(LF_COLS == COLS,
44
1.10k
                "ReinterpretingDCT should only be called with LF == N");
45
1.10k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
1.10k
                                 scratch_space);
47
1.10k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
1.10k
  } else {
56
3.30k
    for (size_t y = 0; y < LF_COLS; y++) {
57
6.60k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
4.40k
        output[y * output_stride + x] =
59
4.40k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
4.40k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
4.40k
      }
62
2.20k
    }
63
1.10k
  }
64
1.10k
}
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
294
                                  float* JXL_RESTRICT scratch_space) {
41
294
  static_assert(LF_ROWS == ROWS,
42
294
                "ReinterpretingDCT should only be called with LF == N");
43
294
  static_assert(LF_COLS == COLS,
44
294
                "ReinterpretingDCT should only be called with LF == N");
45
294
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
294
                                 scratch_space);
47
294
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
294
  } else {
56
882
    for (size_t y = 0; y < LF_COLS; y++) {
57
2.94k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
2.35k
        output[y * output_stride + x] =
59
2.35k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
2.35k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
2.35k
      }
62
588
    }
63
294
  }
64
294
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
147
                                  float* JXL_RESTRICT scratch_space) {
41
147
  static_assert(LF_ROWS == ROWS,
42
147
                "ReinterpretingDCT should only be called with LF == N");
43
147
  static_assert(LF_COLS == COLS,
44
147
                "ReinterpretingDCT should only be called with LF == N");
45
147
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
147
                                 scratch_space);
47
147
  if (ROWS < COLS) {
48
441
    for (size_t y = 0; y < LF_ROWS; y++) {
49
1.47k
      for (size_t x = 0; x < LF_COLS; x++) {
50
1.17k
        output[y * output_stride + x] =
51
1.17k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
1.17k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
1.17k
      }
54
294
    }
55
147
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
147
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
138
                                  float* JXL_RESTRICT scratch_space) {
41
138
  static_assert(LF_ROWS == ROWS,
42
138
                "ReinterpretingDCT should only be called with LF == N");
43
138
  static_assert(LF_COLS == COLS,
44
138
                "ReinterpretingDCT should only be called with LF == N");
45
138
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
138
                                 scratch_space);
47
138
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
138
  } else {
56
690
    for (size_t y = 0; y < LF_COLS; y++) {
57
2.76k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
2.20k
        output[y * output_stride + x] =
59
2.20k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
2.20k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
2.20k
      }
62
552
    }
63
138
  }
64
138
}
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
65
66
template <size_t S>
67
495
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
495
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
495
  static_assert(S % 2 == 0, "S should be even");
70
495
  float temp[kDCTBlockSize];
71
495
  constexpr size_t num_2x2 = S / 2;
72
1.65k
  for (size_t y = 0; y < num_2x2; y++) {
73
4.62k
    for (size_t x = 0; x < num_2x2; x++) {
74
3.46k
      float c00 = block[y * kBlockDim + x];
75
3.46k
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
3.46k
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
3.46k
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
3.46k
      float r00 = c00 + c01 + c10 + c11;
79
3.46k
      float r01 = c00 + c01 - c10 - c11;
80
3.46k
      float r10 = c00 - c01 + c10 - c11;
81
3.46k
      float r11 = c00 - c01 - c10 + c11;
82
3.46k
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
3.46k
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
3.46k
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
3.46k
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
3.46k
    }
87
1.15k
  }
88
2.80k
  for (size_t y = 0; y < S; y++) {
89
16.1k
    for (size_t x = 0; x < S; x++) {
90
13.8k
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
13.8k
    }
92
2.31k
  }
93
495
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
165
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
165
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
165
  static_assert(S % 2 == 0, "S should be even");
70
165
  float temp[kDCTBlockSize];
71
165
  constexpr size_t num_2x2 = S / 2;
72
330
  for (size_t y = 0; y < num_2x2; y++) {
73
330
    for (size_t x = 0; x < num_2x2; x++) {
74
165
      float c00 = block[y * kBlockDim + x];
75
165
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
165
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
165
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
165
      float r00 = c00 + c01 + c10 + c11;
79
165
      float r01 = c00 + c01 - c10 - c11;
80
165
      float r10 = c00 - c01 + c10 - c11;
81
165
      float r11 = c00 - c01 - c10 + c11;
82
165
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
165
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
165
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
165
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
165
    }
87
165
  }
88
495
  for (size_t y = 0; y < S; y++) {
89
990
    for (size_t x = 0; x < S; x++) {
90
660
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
660
    }
92
330
  }
93
165
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
165
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
165
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
165
  static_assert(S % 2 == 0, "S should be even");
70
165
  float temp[kDCTBlockSize];
71
165
  constexpr size_t num_2x2 = S / 2;
72
495
  for (size_t y = 0; y < num_2x2; y++) {
73
990
    for (size_t x = 0; x < num_2x2; x++) {
74
660
      float c00 = block[y * kBlockDim + x];
75
660
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
660
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
660
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
660
      float r00 = c00 + c01 + c10 + c11;
79
660
      float r01 = c00 + c01 - c10 - c11;
80
660
      float r10 = c00 - c01 + c10 - c11;
81
660
      float r11 = c00 - c01 - c10 + c11;
82
660
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
660
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
660
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
660
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
660
    }
87
330
  }
88
825
  for (size_t y = 0; y < S; y++) {
89
3.30k
    for (size_t x = 0; x < S; x++) {
90
2.64k
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
2.64k
    }
92
660
  }
93
165
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
165
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
165
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
165
  static_assert(S % 2 == 0, "S should be even");
70
165
  float temp[kDCTBlockSize];
71
165
  constexpr size_t num_2x2 = S / 2;
72
825
  for (size_t y = 0; y < num_2x2; y++) {
73
3.30k
    for (size_t x = 0; x < num_2x2; x++) {
74
2.64k
      float c00 = block[y * kBlockDim + x];
75
2.64k
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
2.64k
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
2.64k
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
2.64k
      float r00 = c00 + c01 + c10 + c11;
79
2.64k
      float r01 = c00 + c01 - c10 - c11;
80
2.64k
      float r10 = c00 - c01 + c10 - c11;
81
2.64k
      float r11 = c00 - c01 - c10 + c11;
82
2.64k
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
2.64k
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
2.64k
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
2.64k
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
2.64k
    }
87
660
  }
88
1.48k
  for (size_t y = 0; y < S; y++) {
89
11.8k
    for (size_t x = 0; x < S; x++) {
90
10.5k
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
10.5k
    }
92
1.32k
  }
93
165
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
94
95
8.76k
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
8.76k
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
8.76k
      {
98
8.76k
          0.25,
99
8.76k
          0.25,
100
8.76k
          0.25,
101
8.76k
          0.25,
102
8.76k
          0.25,
103
8.76k
          0.25,
104
8.76k
          0.25,
105
8.76k
          0.25,
106
8.76k
          0.25,
107
8.76k
          0.25,
108
8.76k
          0.25,
109
8.76k
          0.25,
110
8.76k
          0.25,
111
8.76k
          0.25,
112
8.76k
          0.25,
113
8.76k
          0.25,
114
8.76k
      },
115
8.76k
      {
116
8.76k
          0.876902929799142f,
117
8.76k
          0.2206518106944235f,
118
8.76k
          -0.10140050393753763f,
119
8.76k
          -0.1014005039375375f,
120
8.76k
          0.2206518106944236f,
121
8.76k
          -0.10140050393753777f,
122
8.76k
          -0.10140050393753772f,
123
8.76k
          -0.10140050393753763f,
124
8.76k
          -0.10140050393753758f,
125
8.76k
          -0.10140050393753769f,
126
8.76k
          -0.1014005039375375f,
127
8.76k
          -0.10140050393753768f,
128
8.76k
          -0.10140050393753768f,
129
8.76k
          -0.10140050393753759f,
130
8.76k
          -0.10140050393753763f,
131
8.76k
          -0.10140050393753741f,
132
8.76k
      },
133
8.76k
      {
134
8.76k
          0.0,
135
8.76k
          0.0,
136
8.76k
          0.40670075830260755f,
137
8.76k
          0.44444816619734445f,
138
8.76k
          0.0,
139
8.76k
          0.0,
140
8.76k
          0.19574399372042936f,
141
8.76k
          0.2929100136981264f,
142
8.76k
          -0.40670075830260716f,
143
8.76k
          -0.19574399372042872f,
144
8.76k
          0.0,
145
8.76k
          0.11379074460448091f,
146
8.76k
          -0.44444816619734384f,
147
8.76k
          -0.29291001369812636f,
148
8.76k
          -0.1137907446044814f,
149
8.76k
          0.0,
150
8.76k
      },
151
8.76k
      {
152
8.76k
          0.0,
153
8.76k
          0.0,
154
8.76k
          -0.21255748058288748f,
155
8.76k
          0.3085497062849767f,
156
8.76k
          0.0,
157
8.76k
          0.4706702258572536f,
158
8.76k
          -0.1621205195722993f,
159
8.76k
          0.0,
160
8.76k
          -0.21255748058287047f,
161
8.76k
          -0.16212051957228327f,
162
8.76k
          -0.47067022585725277f,
163
8.76k
          -0.1464291867126764f,
164
8.76k
          0.3085497062849487f,
165
8.76k
          0.0,
166
8.76k
          -0.14642918671266536f,
167
8.76k
          0.4251149611657548f,
168
8.76k
      },
169
8.76k
      {
170
8.76k
          0.0,
171
8.76k
          -0.7071067811865474f,
172
8.76k
          0.0,
173
8.76k
          0.0,
174
8.76k
          0.7071067811865476f,
175
8.76k
          0.0,
176
8.76k
          0.0,
177
8.76k
          0.0,
178
8.76k
          0.0,
179
8.76k
          0.0,
180
8.76k
          0.0,
181
8.76k
          0.0,
182
8.76k
          0.0,
183
8.76k
          0.0,
184
8.76k
          0.0,
185
8.76k
          0.0,
186
8.76k
      },
187
8.76k
      {
188
8.76k
          -0.4105377591765233f,
189
8.76k
          0.6235485373547691f,
190
8.76k
          -0.06435071657946274f,
191
8.76k
          -0.06435071657946266f,
192
8.76k
          0.6235485373547694f,
193
8.76k
          -0.06435071657946284f,
194
8.76k
          -0.0643507165794628f,
195
8.76k
          -0.06435071657946274f,
196
8.76k
          -0.06435071657946272f,
197
8.76k
          -0.06435071657946279f,
198
8.76k
          -0.06435071657946266f,
199
8.76k
          -0.06435071657946277f,
200
8.76k
          -0.06435071657946277f,
201
8.76k
          -0.06435071657946273f,
202
8.76k
          -0.06435071657946274f,
203
8.76k
          -0.0643507165794626f,
204
8.76k
      },
205
8.76k
      {
206
8.76k
          0.0,
207
8.76k
          0.0,
208
8.76k
          -0.4517556589999482f,
209
8.76k
          0.15854503551840063f,
210
8.76k
          0.0,
211
8.76k
          -0.04038515160822202f,
212
8.76k
          0.0074182263792423875f,
213
8.76k
          0.39351034269210167f,
214
8.76k
          -0.45175565899994635f,
215
8.76k
          0.007418226379244351f,
216
8.76k
          0.1107416575309343f,
217
8.76k
          0.08298163094882051f,
218
8.76k
          0.15854503551839705f,
219
8.76k
          0.3935103426921022f,
220
8.76k
          0.0829816309488214f,
221
8.76k
          -0.45175565899994796f,
222
8.76k
      },
223
8.76k
      {
224
8.76k
          0.0,
225
8.76k
          0.0,
226
8.76k
          -0.304684750724869f,
227
8.76k
          0.5112616136591823f,
228
8.76k
          0.0,
229
8.76k
          0.0,
230
8.76k
          -0.290480129728998f,
231
8.76k
          -0.06578701549142804f,
232
8.76k
          0.304684750724884f,
233
8.76k
          0.2904801297290076f,
234
8.76k
          0.0,
235
8.76k
          -0.23889773523344604f,
236
8.76k
          -0.5112616136592012f,
237
8.76k
          0.06578701549142545f,
238
8.76k
          0.23889773523345467f,
239
8.76k
          0.0,
240
8.76k
      },
241
8.76k
      {
242
8.76k
          0.0,
243
8.76k
          0.0,
244
8.76k
          0.3017929516615495f,
245
8.76k
          0.25792362796341184f,
246
8.76k
          0.0,
247
8.76k
          0.16272340142866204f,
248
8.76k
          0.09520022653475037f,
249
8.76k
          0.0,
250
8.76k
          0.3017929516615503f,
251
8.76k
          0.09520022653475055f,
252
8.76k
          -0.16272340142866173f,
253
8.76k
          -0.35312385449816297f,
254
8.76k
          0.25792362796341295f,
255
8.76k
          0.0,
256
8.76k
          -0.3531238544981624f,
257
8.76k
          -0.6035859033230976f,
258
8.76k
      },
259
8.76k
      {
260
8.76k
          0.0,
261
8.76k
          0.0,
262
8.76k
          0.40824829046386274f,
263
8.76k
          0.0,
264
8.76k
          0.0,
265
8.76k
          0.0,
266
8.76k
          0.0,
267
8.76k
          -0.4082482904638628f,
268
8.76k
          -0.4082482904638635f,
269
8.76k
          0.0,
270
8.76k
          0.0,
271
8.76k
          -0.40824829046386296f,
272
8.76k
          0.0,
273
8.76k
          0.4082482904638634f,
274
8.76k
          0.408248290463863f,
275
8.76k
          0.0,
276
8.76k
      },
277
8.76k
      {
278
8.76k
          0.0,
279
8.76k
          0.0,
280
8.76k
          0.1747866975480809f,
281
8.76k
          0.0812611176717539f,
282
8.76k
          0.0,
283
8.76k
          0.0,
284
8.76k
          -0.3675398009862027f,
285
8.76k
          -0.307882213957909f,
286
8.76k
          -0.17478669754808135f,
287
8.76k
          0.3675398009862011f,
288
8.76k
          0.0,
289
8.76k
          0.4826689115059883f,
290
8.76k
          -0.08126111767175039f,
291
8.76k
          0.30788221395790305f,
292
8.76k
          -0.48266891150598584f,
293
8.76k
          0.0,
294
8.76k
      },
295
8.76k
      {
296
8.76k
          0.0,
297
8.76k
          0.0,
298
8.76k
          -0.21105601049335784f,
299
8.76k
          0.18567180916109802f,
300
8.76k
          0.0,
301
8.76k
          0.0,
302
8.76k
          0.49215859013738733f,
303
8.76k
          -0.38525013709251915f,
304
8.76k
          0.21105601049335806f,
305
8.76k
          -0.49215859013738905f,
306
8.76k
          0.0,
307
8.76k
          0.17419412659916217f,
308
8.76k
          -0.18567180916109904f,
309
8.76k
          0.3852501370925211f,
310
8.76k
          -0.1741941265991621f,
311
8.76k
          0.0,
312
8.76k
      },
313
8.76k
      {
314
8.76k
          0.0,
315
8.76k
          0.0,
316
8.76k
          -0.14266084808807264f,
317
8.76k
          -0.3416446842253372f,
318
8.76k
          0.0,
319
8.76k
          0.7367497537172237f,
320
8.76k
          0.24627107722075148f,
321
8.76k
          -0.08574019035519306f,
322
8.76k
          -0.14266084808807344f,
323
8.76k
          0.24627107722075137f,
324
8.76k
          0.14883399227113567f,
325
8.76k
          -0.04768680350229251f,
326
8.76k
          -0.3416446842253373f,
327
8.76k
          -0.08574019035519267f,
328
8.76k
          -0.047686803502292804f,
329
8.76k
          -0.14266084808807242f,
330
8.76k
      },
331
8.76k
      {
332
8.76k
          0.0,
333
8.76k
          0.0,
334
8.76k
          -0.13813540350758585f,
335
8.76k
          0.3302282550303788f,
336
8.76k
          0.0,
337
8.76k
          0.08755115000587084f,
338
8.76k
          -0.07946706605909573f,
339
8.76k
          -0.4613374887461511f,
340
8.76k
          -0.13813540350758294f,
341
8.76k
          -0.07946706605910261f,
342
8.76k
          0.49724647109535086f,
343
8.76k
          0.12538059448563663f,
344
8.76k
          0.3302282550303805f,
345
8.76k
          -0.4613374887461554f,
346
8.76k
          0.12538059448564315f,
347
8.76k
          -0.13813540350758452f,
348
8.76k
      },
349
8.76k
      {
350
8.76k
          0.0,
351
8.76k
          0.0,
352
8.76k
          -0.17437602599651067f,
353
8.76k
          0.0702790691196284f,
354
8.76k
          0.0,
355
8.76k
          -0.2921026642334881f,
356
8.76k
          0.3623817333531167f,
357
8.76k
          0.0,
358
8.76k
          -0.1743760259965108f,
359
8.76k
          0.36238173335311646f,
360
8.76k
          0.29210266423348785f,
361
8.76k
          -0.4326608024727445f,
362
8.76k
          0.07027906911962818f,
363
8.76k
          0.0,
364
8.76k
          -0.4326608024727457f,
365
8.76k
          0.34875205199302267f,
366
8.76k
      },
367
8.76k
      {
368
8.76k
          0.0,
369
8.76k
          0.0,
370
8.76k
          0.11354987314994337f,
371
8.76k
          -0.07417504595810355f,
372
8.76k
          0.0,
373
8.76k
          0.19402893032594343f,
374
8.76k
          -0.435190496523228f,
375
8.76k
          0.21918684838857466f,
376
8.76k
          0.11354987314994257f,
377
8.76k
          -0.4351904965232251f,
378
8.76k
          0.5550443808910661f,
379
8.76k
          -0.25468277124066463f,
380
8.76k
          -0.07417504595810233f,
381
8.76k
          0.2191868483885728f,
382
8.76k
          -0.25468277124066413f,
383
8.76k
          0.1135498731499429f,
384
8.76k
      },
385
8.76k
  };
386
387
8.76k
  const HWY_CAPPED(float, 16) d;
388
148k
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
140k
    auto pixel = Zero(d);
390
2.38M
    for (size_t j = 0; j < 16; j++) {
391
2.24M
      auto cf = Set(d, coeffs[j]);
392
2.24M
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
2.24M
      pixel = MulAdd(cf, basis, pixel);
394
2.24M
    }
395
140k
    Store(pixel, d, pixels + i);
396
140k
  }
397
8.76k
}
Unexecuted instantiation: enc_group.cc:jxl::N_SCALAR::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
dec_group.cc:jxl::N_SCALAR::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
8.76k
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
8.76k
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
8.76k
      {
98
8.76k
          0.25,
99
8.76k
          0.25,
100
8.76k
          0.25,
101
8.76k
          0.25,
102
8.76k
          0.25,
103
8.76k
          0.25,
104
8.76k
          0.25,
105
8.76k
          0.25,
106
8.76k
          0.25,
107
8.76k
          0.25,
108
8.76k
          0.25,
109
8.76k
          0.25,
110
8.76k
          0.25,
111
8.76k
          0.25,
112
8.76k
          0.25,
113
8.76k
          0.25,
114
8.76k
      },
115
8.76k
      {
116
8.76k
          0.876902929799142f,
117
8.76k
          0.2206518106944235f,
118
8.76k
          -0.10140050393753763f,
119
8.76k
          -0.1014005039375375f,
120
8.76k
          0.2206518106944236f,
121
8.76k
          -0.10140050393753777f,
122
8.76k
          -0.10140050393753772f,
123
8.76k
          -0.10140050393753763f,
124
8.76k
          -0.10140050393753758f,
125
8.76k
          -0.10140050393753769f,
126
8.76k
          -0.1014005039375375f,
127
8.76k
          -0.10140050393753768f,
128
8.76k
          -0.10140050393753768f,
129
8.76k
          -0.10140050393753759f,
130
8.76k
          -0.10140050393753763f,
131
8.76k
          -0.10140050393753741f,
132
8.76k
      },
133
8.76k
      {
134
8.76k
          0.0,
135
8.76k
          0.0,
136
8.76k
          0.40670075830260755f,
137
8.76k
          0.44444816619734445f,
138
8.76k
          0.0,
139
8.76k
          0.0,
140
8.76k
          0.19574399372042936f,
141
8.76k
          0.2929100136981264f,
142
8.76k
          -0.40670075830260716f,
143
8.76k
          -0.19574399372042872f,
144
8.76k
          0.0,
145
8.76k
          0.11379074460448091f,
146
8.76k
          -0.44444816619734384f,
147
8.76k
          -0.29291001369812636f,
148
8.76k
          -0.1137907446044814f,
149
8.76k
          0.0,
150
8.76k
      },
151
8.76k
      {
152
8.76k
          0.0,
153
8.76k
          0.0,
154
8.76k
          -0.21255748058288748f,
155
8.76k
          0.3085497062849767f,
156
8.76k
          0.0,
157
8.76k
          0.4706702258572536f,
158
8.76k
          -0.1621205195722993f,
159
8.76k
          0.0,
160
8.76k
          -0.21255748058287047f,
161
8.76k
          -0.16212051957228327f,
162
8.76k
          -0.47067022585725277f,
163
8.76k
          -0.1464291867126764f,
164
8.76k
          0.3085497062849487f,
165
8.76k
          0.0,
166
8.76k
          -0.14642918671266536f,
167
8.76k
          0.4251149611657548f,
168
8.76k
      },
169
8.76k
      {
170
8.76k
          0.0,
171
8.76k
          -0.7071067811865474f,
172
8.76k
          0.0,
173
8.76k
          0.0,
174
8.76k
          0.7071067811865476f,
175
8.76k
          0.0,
176
8.76k
          0.0,
177
8.76k
          0.0,
178
8.76k
          0.0,
179
8.76k
          0.0,
180
8.76k
          0.0,
181
8.76k
          0.0,
182
8.76k
          0.0,
183
8.76k
          0.0,
184
8.76k
          0.0,
185
8.76k
          0.0,
186
8.76k
      },
187
8.76k
      {
188
8.76k
          -0.4105377591765233f,
189
8.76k
          0.6235485373547691f,
190
8.76k
          -0.06435071657946274f,
191
8.76k
          -0.06435071657946266f,
192
8.76k
          0.6235485373547694f,
193
8.76k
          -0.06435071657946284f,
194
8.76k
          -0.0643507165794628f,
195
8.76k
          -0.06435071657946274f,
196
8.76k
          -0.06435071657946272f,
197
8.76k
          -0.06435071657946279f,
198
8.76k
          -0.06435071657946266f,
199
8.76k
          -0.06435071657946277f,
200
8.76k
          -0.06435071657946277f,
201
8.76k
          -0.06435071657946273f,
202
8.76k
          -0.06435071657946274f,
203
8.76k
          -0.0643507165794626f,
204
8.76k
      },
205
8.76k
      {
206
8.76k
          0.0,
207
8.76k
          0.0,
208
8.76k
          -0.4517556589999482f,
209
8.76k
          0.15854503551840063f,
210
8.76k
          0.0,
211
8.76k
          -0.04038515160822202f,
212
8.76k
          0.0074182263792423875f,
213
8.76k
          0.39351034269210167f,
214
8.76k
          -0.45175565899994635f,
215
8.76k
          0.007418226379244351f,
216
8.76k
          0.1107416575309343f,
217
8.76k
          0.08298163094882051f,
218
8.76k
          0.15854503551839705f,
219
8.76k
          0.3935103426921022f,
220
8.76k
          0.0829816309488214f,
221
8.76k
          -0.45175565899994796f,
222
8.76k
      },
223
8.76k
      {
224
8.76k
          0.0,
225
8.76k
          0.0,
226
8.76k
          -0.304684750724869f,
227
8.76k
          0.5112616136591823f,
228
8.76k
          0.0,
229
8.76k
          0.0,
230
8.76k
          -0.290480129728998f,
231
8.76k
          -0.06578701549142804f,
232
8.76k
          0.304684750724884f,
233
8.76k
          0.2904801297290076f,
234
8.76k
          0.0,
235
8.76k
          -0.23889773523344604f,
236
8.76k
          -0.5112616136592012f,
237
8.76k
          0.06578701549142545f,
238
8.76k
          0.23889773523345467f,
239
8.76k
          0.0,
240
8.76k
      },
241
8.76k
      {
242
8.76k
          0.0,
243
8.76k
          0.0,
244
8.76k
          0.3017929516615495f,
245
8.76k
          0.25792362796341184f,
246
8.76k
          0.0,
247
8.76k
          0.16272340142866204f,
248
8.76k
          0.09520022653475037f,
249
8.76k
          0.0,
250
8.76k
          0.3017929516615503f,
251
8.76k
          0.09520022653475055f,
252
8.76k
          -0.16272340142866173f,
253
8.76k
          -0.35312385449816297f,
254
8.76k
          0.25792362796341295f,
255
8.76k
          0.0,
256
8.76k
          -0.3531238544981624f,
257
8.76k
          -0.6035859033230976f,
258
8.76k
      },
259
8.76k
      {
260
8.76k
          0.0,
261
8.76k
          0.0,
262
8.76k
          0.40824829046386274f,
263
8.76k
          0.0,
264
8.76k
          0.0,
265
8.76k
          0.0,
266
8.76k
          0.0,
267
8.76k
          -0.4082482904638628f,
268
8.76k
          -0.4082482904638635f,
269
8.76k
          0.0,
270
8.76k
          0.0,
271
8.76k
          -0.40824829046386296f,
272
8.76k
          0.0,
273
8.76k
          0.4082482904638634f,
274
8.76k
          0.408248290463863f,
275
8.76k
          0.0,
276
8.76k
      },
277
8.76k
      {
278
8.76k
          0.0,
279
8.76k
          0.0,
280
8.76k
          0.1747866975480809f,
281
8.76k
          0.0812611176717539f,
282
8.76k
          0.0,
283
8.76k
          0.0,
284
8.76k
          -0.3675398009862027f,
285
8.76k
          -0.307882213957909f,
286
8.76k
          -0.17478669754808135f,
287
8.76k
          0.3675398009862011f,
288
8.76k
          0.0,
289
8.76k
          0.4826689115059883f,
290
8.76k
          -0.08126111767175039f,
291
8.76k
          0.30788221395790305f,
292
8.76k
          -0.48266891150598584f,
293
8.76k
          0.0,
294
8.76k
      },
295
8.76k
      {
296
8.76k
          0.0,
297
8.76k
          0.0,
298
8.76k
          -0.21105601049335784f,
299
8.76k
          0.18567180916109802f,
300
8.76k
          0.0,
301
8.76k
          0.0,
302
8.76k
          0.49215859013738733f,
303
8.76k
          -0.38525013709251915f,
304
8.76k
          0.21105601049335806f,
305
8.76k
          -0.49215859013738905f,
306
8.76k
          0.0,
307
8.76k
          0.17419412659916217f,
308
8.76k
          -0.18567180916109904f,
309
8.76k
          0.3852501370925211f,
310
8.76k
          -0.1741941265991621f,
311
8.76k
          0.0,
312
8.76k
      },
313
8.76k
      {
314
8.76k
          0.0,
315
8.76k
          0.0,
316
8.76k
          -0.14266084808807264f,
317
8.76k
          -0.3416446842253372f,
318
8.76k
          0.0,
319
8.76k
          0.7367497537172237f,
320
8.76k
          0.24627107722075148f,
321
8.76k
          -0.08574019035519306f,
322
8.76k
          -0.14266084808807344f,
323
8.76k
          0.24627107722075137f,
324
8.76k
          0.14883399227113567f,
325
8.76k
          -0.04768680350229251f,
326
8.76k
          -0.3416446842253373f,
327
8.76k
          -0.08574019035519267f,
328
8.76k
          -0.047686803502292804f,
329
8.76k
          -0.14266084808807242f,
330
8.76k
      },
331
8.76k
      {
332
8.76k
          0.0,
333
8.76k
          0.0,
334
8.76k
          -0.13813540350758585f,
335
8.76k
          0.3302282550303788f,
336
8.76k
          0.0,
337
8.76k
          0.08755115000587084f,
338
8.76k
          -0.07946706605909573f,
339
8.76k
          -0.4613374887461511f,
340
8.76k
          -0.13813540350758294f,
341
8.76k
          -0.07946706605910261f,
342
8.76k
          0.49724647109535086f,
343
8.76k
          0.12538059448563663f,
344
8.76k
          0.3302282550303805f,
345
8.76k
          -0.4613374887461554f,
346
8.76k
          0.12538059448564315f,
347
8.76k
          -0.13813540350758452f,
348
8.76k
      },
349
8.76k
      {
350
8.76k
          0.0,
351
8.76k
          0.0,
352
8.76k
          -0.17437602599651067f,
353
8.76k
          0.0702790691196284f,
354
8.76k
          0.0,
355
8.76k
          -0.2921026642334881f,
356
8.76k
          0.3623817333531167f,
357
8.76k
          0.0,
358
8.76k
          -0.1743760259965108f,
359
8.76k
          0.36238173335311646f,
360
8.76k
          0.29210266423348785f,
361
8.76k
          -0.4326608024727445f,
362
8.76k
          0.07027906911962818f,
363
8.76k
          0.0,
364
8.76k
          -0.4326608024727457f,
365
8.76k
          0.34875205199302267f,
366
8.76k
      },
367
8.76k
      {
368
8.76k
          0.0,
369
8.76k
          0.0,
370
8.76k
          0.11354987314994337f,
371
8.76k
          -0.07417504595810355f,
372
8.76k
          0.0,
373
8.76k
          0.19402893032594343f,
374
8.76k
          -0.435190496523228f,
375
8.76k
          0.21918684838857466f,
376
8.76k
          0.11354987314994257f,
377
8.76k
          -0.4351904965232251f,
378
8.76k
          0.5550443808910661f,
379
8.76k
          -0.25468277124066463f,
380
8.76k
          -0.07417504595810233f,
381
8.76k
          0.2191868483885728f,
382
8.76k
          -0.25468277124066413f,
383
8.76k
          0.1135498731499429f,
384
8.76k
      },
385
8.76k
  };
386
387
8.76k
  const HWY_CAPPED(float, 16) d;
388
148k
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
140k
    auto pixel = Zero(d);
390
2.38M
    for (size_t j = 0; j < 16; j++) {
391
2.24M
      auto cf = Set(d, coeffs[j]);
392
2.24M
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
2.24M
      pixel = MulAdd(cf, basis, pixel);
394
2.24M
    }
395
140k
    Store(pixel, d, pixels + i);
396
140k
  }
397
8.76k
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SCALAR::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SCALAR::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
398
399
template <size_t afv_kind>
400
void AFVTransformToPixels(const float* JXL_RESTRICT coefficients,
401
8.76k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
8.76k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
8.76k
  size_t afv_x = afv_kind & 1;
404
8.76k
  size_t afv_y = afv_kind / 2;
405
8.76k
  float dcs[3] = {};
406
8.76k
  float block00 = coefficients[0];
407
8.76k
  float block01 = coefficients[1];
408
8.76k
  float block10 = coefficients[8];
409
8.76k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
8.76k
  dcs[1] = (block00 + block10 - block01);
411
8.76k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
8.76k
  HWY_ALIGN float coeff[4 * 4];
414
8.76k
  coeff[0] = dcs[0];
415
43.8k
  for (size_t iy = 0; iy < 4; iy++) {
416
175k
    for (size_t ix = 0; ix < 4; ix++) {
417
140k
      if (ix == 0 && iy == 0) continue;
418
131k
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
131k
    }
420
35.0k
  }
421
8.76k
  HWY_ALIGN float block[4 * 8];
422
8.76k
  AFVIDCT4x4(coeff, block);
423
43.8k
  for (size_t iy = 0; iy < 4; iy++) {
424
175k
    for (size_t ix = 0; ix < 4; ix++) {
425
140k
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
140k
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
140k
    }
428
35.0k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
8.76k
  block[0] = dcs[1];
431
43.8k
  for (size_t iy = 0; iy < 4; iy++) {
432
175k
    for (size_t ix = 0; ix < 4; ix++) {
433
140k
      if (ix == 0 && iy == 0) continue;
434
131k
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
131k
    }
436
35.0k
  }
437
8.76k
  ComputeScaledIDCT<4, 4>()(
438
8.76k
      block,
439
8.76k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
8.76k
            pixels_stride),
441
8.76k
      scratch_space);
442
  // IDCT4x8.
443
8.76k
  block[0] = dcs[2];
444
43.8k
  for (size_t iy = 0; iy < 4; iy++) {
445
315k
    for (size_t ix = 0; ix < 8; ix++) {
446
280k
      if (ix == 0 && iy == 0) continue;
447
271k
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
271k
    }
449
35.0k
  }
450
8.76k
  ComputeScaledIDCT<4, 8>()(
451
8.76k
      block,
452
8.76k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
8.76k
      scratch_space);
454
8.76k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
78
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
78
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
78
  size_t afv_x = afv_kind & 1;
404
78
  size_t afv_y = afv_kind / 2;
405
78
  float dcs[3] = {};
406
78
  float block00 = coefficients[0];
407
78
  float block01 = coefficients[1];
408
78
  float block10 = coefficients[8];
409
78
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
78
  dcs[1] = (block00 + block10 - block01);
411
78
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
78
  HWY_ALIGN float coeff[4 * 4];
414
78
  coeff[0] = dcs[0];
415
390
  for (size_t iy = 0; iy < 4; iy++) {
416
1.56k
    for (size_t ix = 0; ix < 4; ix++) {
417
1.24k
      if (ix == 0 && iy == 0) continue;
418
1.17k
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
1.17k
    }
420
312
  }
421
78
  HWY_ALIGN float block[4 * 8];
422
78
  AFVIDCT4x4(coeff, block);
423
390
  for (size_t iy = 0; iy < 4; iy++) {
424
1.56k
    for (size_t ix = 0; ix < 4; ix++) {
425
1.24k
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
1.24k
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
1.24k
    }
428
312
  }
429
  // IDCT4x4 in (odd, even) positions.
430
78
  block[0] = dcs[1];
431
390
  for (size_t iy = 0; iy < 4; iy++) {
432
1.56k
    for (size_t ix = 0; ix < 4; ix++) {
433
1.24k
      if (ix == 0 && iy == 0) continue;
434
1.17k
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
1.17k
    }
436
312
  }
437
78
  ComputeScaledIDCT<4, 4>()(
438
78
      block,
439
78
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
78
            pixels_stride),
441
78
      scratch_space);
442
  // IDCT4x8.
443
78
  block[0] = dcs[2];
444
390
  for (size_t iy = 0; iy < 4; iy++) {
445
2.80k
    for (size_t ix = 0; ix < 8; ix++) {
446
2.49k
      if (ix == 0 && iy == 0) continue;
447
2.41k
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
2.41k
    }
449
312
  }
450
78
  ComputeScaledIDCT<4, 8>()(
451
78
      block,
452
78
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
78
      scratch_space);
454
78
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
42
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
42
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
42
  size_t afv_x = afv_kind & 1;
404
42
  size_t afv_y = afv_kind / 2;
405
42
  float dcs[3] = {};
406
42
  float block00 = coefficients[0];
407
42
  float block01 = coefficients[1];
408
42
  float block10 = coefficients[8];
409
42
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
42
  dcs[1] = (block00 + block10 - block01);
411
42
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
42
  HWY_ALIGN float coeff[4 * 4];
414
42
  coeff[0] = dcs[0];
415
210
  for (size_t iy = 0; iy < 4; iy++) {
416
840
    for (size_t ix = 0; ix < 4; ix++) {
417
672
      if (ix == 0 && iy == 0) continue;
418
630
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
630
    }
420
168
  }
421
42
  HWY_ALIGN float block[4 * 8];
422
42
  AFVIDCT4x4(coeff, block);
423
210
  for (size_t iy = 0; iy < 4; iy++) {
424
840
    for (size_t ix = 0; ix < 4; ix++) {
425
672
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
672
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
672
    }
428
168
  }
429
  // IDCT4x4 in (odd, even) positions.
430
42
  block[0] = dcs[1];
431
210
  for (size_t iy = 0; iy < 4; iy++) {
432
840
    for (size_t ix = 0; ix < 4; ix++) {
433
672
      if (ix == 0 && iy == 0) continue;
434
630
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
630
    }
436
168
  }
437
42
  ComputeScaledIDCT<4, 4>()(
438
42
      block,
439
42
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
42
            pixels_stride),
441
42
      scratch_space);
442
  // IDCT4x8.
443
42
  block[0] = dcs[2];
444
210
  for (size_t iy = 0; iy < 4; iy++) {
445
1.51k
    for (size_t ix = 0; ix < 8; ix++) {
446
1.34k
      if (ix == 0 && iy == 0) continue;
447
1.30k
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
1.30k
    }
449
168
  }
450
42
  ComputeScaledIDCT<4, 8>()(
451
42
      block,
452
42
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
42
      scratch_space);
454
42
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
630
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
630
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
630
  size_t afv_x = afv_kind & 1;
404
630
  size_t afv_y = afv_kind / 2;
405
630
  float dcs[3] = {};
406
630
  float block00 = coefficients[0];
407
630
  float block01 = coefficients[1];
408
630
  float block10 = coefficients[8];
409
630
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
630
  dcs[1] = (block00 + block10 - block01);
411
630
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
630
  HWY_ALIGN float coeff[4 * 4];
414
630
  coeff[0] = dcs[0];
415
3.15k
  for (size_t iy = 0; iy < 4; iy++) {
416
12.6k
    for (size_t ix = 0; ix < 4; ix++) {
417
10.0k
      if (ix == 0 && iy == 0) continue;
418
9.45k
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
9.45k
    }
420
2.52k
  }
421
630
  HWY_ALIGN float block[4 * 8];
422
630
  AFVIDCT4x4(coeff, block);
423
3.15k
  for (size_t iy = 0; iy < 4; iy++) {
424
12.6k
    for (size_t ix = 0; ix < 4; ix++) {
425
10.0k
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
10.0k
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
10.0k
    }
428
2.52k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
630
  block[0] = dcs[1];
431
3.15k
  for (size_t iy = 0; iy < 4; iy++) {
432
12.6k
    for (size_t ix = 0; ix < 4; ix++) {
433
10.0k
      if (ix == 0 && iy == 0) continue;
434
9.45k
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
9.45k
    }
436
2.52k
  }
437
630
  ComputeScaledIDCT<4, 4>()(
438
630
      block,
439
630
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
630
            pixels_stride),
441
630
      scratch_space);
442
  // IDCT4x8.
443
630
  block[0] = dcs[2];
444
3.15k
  for (size_t iy = 0; iy < 4; iy++) {
445
22.6k
    for (size_t ix = 0; ix < 8; ix++) {
446
20.1k
      if (ix == 0 && iy == 0) continue;
447
19.5k
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
19.5k
    }
449
2.52k
  }
450
630
  ComputeScaledIDCT<4, 8>()(
451
630
      block,
452
630
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
630
      scratch_space);
454
630
}
dec_group.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
8.01k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
8.01k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
8.01k
  size_t afv_x = afv_kind & 1;
404
8.01k
  size_t afv_y = afv_kind / 2;
405
8.01k
  float dcs[3] = {};
406
8.01k
  float block00 = coefficients[0];
407
8.01k
  float block01 = coefficients[1];
408
8.01k
  float block10 = coefficients[8];
409
8.01k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
8.01k
  dcs[1] = (block00 + block10 - block01);
411
8.01k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
8.01k
  HWY_ALIGN float coeff[4 * 4];
414
8.01k
  coeff[0] = dcs[0];
415
40.0k
  for (size_t iy = 0; iy < 4; iy++) {
416
160k
    for (size_t ix = 0; ix < 4; ix++) {
417
128k
      if (ix == 0 && iy == 0) continue;
418
120k
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
120k
    }
420
32.0k
  }
421
8.01k
  HWY_ALIGN float block[4 * 8];
422
8.01k
  AFVIDCT4x4(coeff, block);
423
40.0k
  for (size_t iy = 0; iy < 4; iy++) {
424
160k
    for (size_t ix = 0; ix < 4; ix++) {
425
128k
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
128k
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
128k
    }
428
32.0k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
8.01k
  block[0] = dcs[1];
431
40.0k
  for (size_t iy = 0; iy < 4; iy++) {
432
160k
    for (size_t ix = 0; ix < 4; ix++) {
433
128k
      if (ix == 0 && iy == 0) continue;
434
120k
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
120k
    }
436
32.0k
  }
437
8.01k
  ComputeScaledIDCT<4, 4>()(
438
8.01k
      block,
439
8.01k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
8.01k
            pixels_stride),
441
8.01k
      scratch_space);
442
  // IDCT4x8.
443
8.01k
  block[0] = dcs[2];
444
40.0k
  for (size_t iy = 0; iy < 4; iy++) {
445
288k
    for (size_t ix = 0; ix < 8; ix++) {
446
256k
      if (ix == 0 && iy == 0) continue;
447
248k
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
248k
    }
449
32.0k
  }
450
8.01k
  ComputeScaledIDCT<4, 8>()(
451
8.01k
      block,
452
8.01k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
8.01k
      scratch_space);
454
8.01k
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SCALAR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
455
456
HWY_MAYBE_UNUSED void TransformToPixels(const AcStrategyType strategy,
457
                                        float* JXL_RESTRICT coefficients,
458
                                        float* JXL_RESTRICT pixels,
459
                                        size_t pixels_stride,
460
117k
                                        float* scratch_space) {
461
117k
  using Type = AcStrategyType;
462
117k
  switch (strategy) {
463
23.0k
    case Type::IDENTITY: {
464
23.0k
      float dcs[4] = {};
465
23.0k
      float block00 = coefficients[0];
466
23.0k
      float block01 = coefficients[1];
467
23.0k
      float block10 = coefficients[8];
468
23.0k
      float block11 = coefficients[9];
469
23.0k
      dcs[0] = block00 + block01 + block10 + block11;
470
23.0k
      dcs[1] = block00 + block01 - block10 - block11;
471
23.0k
      dcs[2] = block00 - block01 + block10 - block11;
472
23.0k
      dcs[3] = block00 - block01 - block10 + block11;
473
69.2k
      for (size_t y = 0; y < 2; y++) {
474
138k
        for (size_t x = 0; x < 2; x++) {
475
92.3k
          float block_dc = dcs[y * 2 + x];
476
92.3k
          float residual_sum = 0;
477
461k
          for (size_t iy = 0; iy < 4; iy++) {
478
1.84M
            for (size_t ix = 0; ix < 4; ix++) {
479
1.47M
              if (ix == 0 && iy == 0) continue;
480
1.38M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
1.38M
            }
482
369k
          }
483
92.3k
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
92.3k
              block_dc - residual_sum * (1.0f / 16);
485
461k
          for (size_t iy = 0; iy < 4; iy++) {
486
1.84M
            for (size_t ix = 0; ix < 4; ix++) {
487
1.47M
              if (ix == 1 && iy == 1) continue;
488
1.38M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
1.38M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
1.38M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
1.38M
            }
492
369k
          }
493
92.3k
          pixels[y * 4 * pixels_stride + x * 4] =
494
92.3k
              coefficients[(y + 2) * 8 + x + 2] +
495
92.3k
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
92.3k
        }
497
46.1k
      }
498
23.0k
      break;
499
0
    }
500
1.18k
    case Type::DCT8X4: {
501
1.18k
      float dcs[2] = {};
502
1.18k
      float block0 = coefficients[0];
503
1.18k
      float block1 = coefficients[8];
504
1.18k
      dcs[0] = block0 + block1;
505
1.18k
      dcs[1] = block0 - block1;
506
3.54k
      for (size_t x = 0; x < 2; x++) {
507
2.36k
        HWY_ALIGN float block[4 * 8];
508
2.36k
        block[0] = dcs[x];
509
11.8k
        for (size_t iy = 0; iy < 4; iy++) {
510
85.1k
          for (size_t ix = 0; ix < 8; ix++) {
511
75.6k
            if (ix == 0 && iy == 0) continue;
512
73.2k
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
73.2k
          }
514
9.45k
        }
515
2.36k
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
2.36k
                                  scratch_space);
517
2.36k
      }
518
1.18k
      break;
519
0
    }
520
918
    case Type::DCT4X8: {
521
918
      float dcs[2] = {};
522
918
      float block0 = coefficients[0];
523
918
      float block1 = coefficients[8];
524
918
      dcs[0] = block0 + block1;
525
918
      dcs[1] = block0 - block1;
526
2.75k
      for (size_t y = 0; y < 2; y++) {
527
1.83k
        HWY_ALIGN float block[4 * 8];
528
1.83k
        block[0] = dcs[y];
529
9.18k
        for (size_t iy = 0; iy < 4; iy++) {
530
66.0k
          for (size_t ix = 0; ix < 8; ix++) {
531
58.7k
            if (ix == 0 && iy == 0) continue;
532
56.9k
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
56.9k
          }
534
7.34k
        }
535
1.83k
        ComputeScaledIDCT<4, 8>()(
536
1.83k
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
1.83k
            scratch_space);
538
1.83k
      }
539
918
      break;
540
0
    }
541
0
    case Type::DCT4X4: {
542
0
      float dcs[4] = {};
543
0
      float block00 = coefficients[0];
544
0
      float block01 = coefficients[1];
545
0
      float block10 = coefficients[8];
546
0
      float block11 = coefficients[9];
547
0
      dcs[0] = block00 + block01 + block10 + block11;
548
0
      dcs[1] = block00 + block01 - block10 - block11;
549
0
      dcs[2] = block00 - block01 + block10 - block11;
550
0
      dcs[3] = block00 - block01 - block10 + block11;
551
0
      for (size_t y = 0; y < 2; y++) {
552
0
        for (size_t x = 0; x < 2; x++) {
553
0
          HWY_ALIGN float block[4 * 4];
554
0
          block[0] = dcs[y * 2 + x];
555
0
          for (size_t iy = 0; iy < 4; iy++) {
556
0
            for (size_t ix = 0; ix < 4; ix++) {
557
0
              if (ix == 0 && iy == 0) continue;
558
0
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
0
            }
560
0
          }
561
0
          ComputeScaledIDCT<4, 4>()(
562
0
              block,
563
0
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
0
              scratch_space);
565
0
        }
566
0
      }
567
0
      break;
568
0
    }
569
165
    case Type::DCT2X2: {
570
165
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
165
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
165
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
165
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
165
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
1.48k
      for (size_t y = 0; y < kBlockDim; y++) {
576
11.8k
        for (size_t x = 0; x < kBlockDim; x++) {
577
10.5k
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
10.5k
        }
579
1.32k
      }
580
165
      break;
581
0
    }
582
1.10k
    case Type::DCT16X16: {
583
1.10k
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
1.10k
                                  scratch_space);
585
1.10k
      break;
586
0
    }
587
522
    case Type::DCT16X8: {
588
522
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
522
                                 scratch_space);
590
522
      break;
591
0
    }
592
61.0k
    case Type::DCT8X16: {
593
61.0k
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
61.0k
                                 scratch_space);
595
61.0k
      break;
596
0
    }
597
0
    case Type::DCT32X8: {
598
0
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
0
                                 scratch_space);
600
0
      break;
601
0
    }
602
0
    case Type::DCT8X32: {
603
0
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
0
                                 scratch_space);
605
0
      break;
606
0
    }
607
294
    case Type::DCT32X16: {
608
294
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
294
                                  scratch_space);
610
294
      break;
611
0
    }
612
147
    case Type::DCT16X32: {
613
147
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
147
                                  scratch_space);
615
147
      break;
616
0
    }
617
138
    case Type::DCT32X32: {
618
138
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
138
                                  scratch_space);
620
138
      break;
621
0
    }
622
20.1k
    case Type::DCT: {
623
20.1k
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
20.1k
                                scratch_space);
625
20.1k
      break;
626
0
    }
627
78
    case Type::AFV0: {
628
78
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
78
      break;
630
0
    }
631
42
    case Type::AFV1: {
632
42
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
42
      break;
634
0
    }
635
630
    case Type::AFV2: {
636
630
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
630
      break;
638
0
    }
639
8.01k
    case Type::AFV3: {
640
8.01k
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
8.01k
      break;
642
0
    }
643
0
    case Type::DCT64X32: {
644
0
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT32X64: {
649
0
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT64X64: {
654
0
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
0
                                  scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT128X64: {
659
0
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT64X128: {
664
0
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
0
    case Type::DCT128X128: {
669
0
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
0
                                    scratch_space);
671
0
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
117k
  }
689
117k
}
Unexecuted instantiation: enc_group.cc:jxl::N_SCALAR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
dec_group.cc:jxl::N_SCALAR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
117k
                                        float* scratch_space) {
461
117k
  using Type = AcStrategyType;
462
117k
  switch (strategy) {
463
23.0k
    case Type::IDENTITY: {
464
23.0k
      float dcs[4] = {};
465
23.0k
      float block00 = coefficients[0];
466
23.0k
      float block01 = coefficients[1];
467
23.0k
      float block10 = coefficients[8];
468
23.0k
      float block11 = coefficients[9];
469
23.0k
      dcs[0] = block00 + block01 + block10 + block11;
470
23.0k
      dcs[1] = block00 + block01 - block10 - block11;
471
23.0k
      dcs[2] = block00 - block01 + block10 - block11;
472
23.0k
      dcs[3] = block00 - block01 - block10 + block11;
473
69.2k
      for (size_t y = 0; y < 2; y++) {
474
138k
        for (size_t x = 0; x < 2; x++) {
475
92.3k
          float block_dc = dcs[y * 2 + x];
476
92.3k
          float residual_sum = 0;
477
461k
          for (size_t iy = 0; iy < 4; iy++) {
478
1.84M
            for (size_t ix = 0; ix < 4; ix++) {
479
1.47M
              if (ix == 0 && iy == 0) continue;
480
1.38M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
1.38M
            }
482
369k
          }
483
92.3k
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
92.3k
              block_dc - residual_sum * (1.0f / 16);
485
461k
          for (size_t iy = 0; iy < 4; iy++) {
486
1.84M
            for (size_t ix = 0; ix < 4; ix++) {
487
1.47M
              if (ix == 1 && iy == 1) continue;
488
1.38M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
1.38M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
1.38M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
1.38M
            }
492
369k
          }
493
92.3k
          pixels[y * 4 * pixels_stride + x * 4] =
494
92.3k
              coefficients[(y + 2) * 8 + x + 2] +
495
92.3k
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
92.3k
        }
497
46.1k
      }
498
23.0k
      break;
499
0
    }
500
1.18k
    case Type::DCT8X4: {
501
1.18k
      float dcs[2] = {};
502
1.18k
      float block0 = coefficients[0];
503
1.18k
      float block1 = coefficients[8];
504
1.18k
      dcs[0] = block0 + block1;
505
1.18k
      dcs[1] = block0 - block1;
506
3.54k
      for (size_t x = 0; x < 2; x++) {
507
2.36k
        HWY_ALIGN float block[4 * 8];
508
2.36k
        block[0] = dcs[x];
509
11.8k
        for (size_t iy = 0; iy < 4; iy++) {
510
85.1k
          for (size_t ix = 0; ix < 8; ix++) {
511
75.6k
            if (ix == 0 && iy == 0) continue;
512
73.2k
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
73.2k
          }
514
9.45k
        }
515
2.36k
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
2.36k
                                  scratch_space);
517
2.36k
      }
518
1.18k
      break;
519
0
    }
520
918
    case Type::DCT4X8: {
521
918
      float dcs[2] = {};
522
918
      float block0 = coefficients[0];
523
918
      float block1 = coefficients[8];
524
918
      dcs[0] = block0 + block1;
525
918
      dcs[1] = block0 - block1;
526
2.75k
      for (size_t y = 0; y < 2; y++) {
527
1.83k
        HWY_ALIGN float block[4 * 8];
528
1.83k
        block[0] = dcs[y];
529
9.18k
        for (size_t iy = 0; iy < 4; iy++) {
530
66.0k
          for (size_t ix = 0; ix < 8; ix++) {
531
58.7k
            if (ix == 0 && iy == 0) continue;
532
56.9k
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
56.9k
          }
534
7.34k
        }
535
1.83k
        ComputeScaledIDCT<4, 8>()(
536
1.83k
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
1.83k
            scratch_space);
538
1.83k
      }
539
918
      break;
540
0
    }
541
0
    case Type::DCT4X4: {
542
0
      float dcs[4] = {};
543
0
      float block00 = coefficients[0];
544
0
      float block01 = coefficients[1];
545
0
      float block10 = coefficients[8];
546
0
      float block11 = coefficients[9];
547
0
      dcs[0] = block00 + block01 + block10 + block11;
548
0
      dcs[1] = block00 + block01 - block10 - block11;
549
0
      dcs[2] = block00 - block01 + block10 - block11;
550
0
      dcs[3] = block00 - block01 - block10 + block11;
551
0
      for (size_t y = 0; y < 2; y++) {
552
0
        for (size_t x = 0; x < 2; x++) {
553
0
          HWY_ALIGN float block[4 * 4];
554
0
          block[0] = dcs[y * 2 + x];
555
0
          for (size_t iy = 0; iy < 4; iy++) {
556
0
            for (size_t ix = 0; ix < 4; ix++) {
557
0
              if (ix == 0 && iy == 0) continue;
558
0
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
0
            }
560
0
          }
561
0
          ComputeScaledIDCT<4, 4>()(
562
0
              block,
563
0
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
0
              scratch_space);
565
0
        }
566
0
      }
567
0
      break;
568
0
    }
569
165
    case Type::DCT2X2: {
570
165
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
165
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
165
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
165
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
165
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
1.48k
      for (size_t y = 0; y < kBlockDim; y++) {
576
11.8k
        for (size_t x = 0; x < kBlockDim; x++) {
577
10.5k
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
10.5k
        }
579
1.32k
      }
580
165
      break;
581
0
    }
582
1.10k
    case Type::DCT16X16: {
583
1.10k
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
1.10k
                                  scratch_space);
585
1.10k
      break;
586
0
    }
587
522
    case Type::DCT16X8: {
588
522
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
522
                                 scratch_space);
590
522
      break;
591
0
    }
592
61.0k
    case Type::DCT8X16: {
593
61.0k
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
61.0k
                                 scratch_space);
595
61.0k
      break;
596
0
    }
597
0
    case Type::DCT32X8: {
598
0
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
0
                                 scratch_space);
600
0
      break;
601
0
    }
602
0
    case Type::DCT8X32: {
603
0
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
0
                                 scratch_space);
605
0
      break;
606
0
    }
607
294
    case Type::DCT32X16: {
608
294
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
294
                                  scratch_space);
610
294
      break;
611
0
    }
612
147
    case Type::DCT16X32: {
613
147
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
147
                                  scratch_space);
615
147
      break;
616
0
    }
617
138
    case Type::DCT32X32: {
618
138
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
138
                                  scratch_space);
620
138
      break;
621
0
    }
622
20.1k
    case Type::DCT: {
623
20.1k
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
20.1k
                                scratch_space);
625
20.1k
      break;
626
0
    }
627
78
    case Type::AFV0: {
628
78
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
78
      break;
630
0
    }
631
42
    case Type::AFV1: {
632
42
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
42
      break;
634
0
    }
635
630
    case Type::AFV2: {
636
630
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
630
      break;
638
0
    }
639
8.01k
    case Type::AFV3: {
640
8.01k
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
8.01k
      break;
642
0
    }
643
0
    case Type::DCT64X32: {
644
0
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT32X64: {
649
0
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT64X64: {
654
0
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
0
                                  scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT128X64: {
659
0
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT64X128: {
664
0
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
0
    case Type::DCT128X128: {
669
0
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
0
                                    scratch_space);
671
0
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
117k
  }
689
117k
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SCALAR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SCALAR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
690
691
HWY_MAYBE_UNUSED void LowestFrequenciesFromDC(const AcStrategyType strategy,
692
                                              const float* dc, size_t dc_stride,
693
                                              float* llf,
694
116k
                                              float* JXL_RESTRICT scratch) {
695
116k
  using Type = AcStrategyType;
696
116k
  HWY_ALIGN float warm_block[4 * 4];
697
116k
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
116k
  switch (strategy) {
699
522
    case Type::DCT16X8: {
700
522
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
522
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
522
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
522
      break;
704
0
    }
705
60.6k
    case Type::DCT8X16: {
706
60.6k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
60.6k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
60.6k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
60.6k
      break;
710
0
    }
711
1.10k
    case Type::DCT16X16: {
712
1.10k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
1.10k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
1.10k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
1.10k
      break;
716
0
    }
717
0
    case Type::DCT32X8: {
718
0
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
0
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
0
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
0
      break;
722
0
    }
723
0
    case Type::DCT8X32: {
724
0
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
0
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
0
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
0
      break;
728
0
    }
729
294
    case Type::DCT32X16: {
730
294
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
294
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
294
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
294
      break;
734
0
    }
735
147
    case Type::DCT16X32: {
736
147
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
147
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
147
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
147
      break;
740
0
    }
741
138
    case Type::DCT32X32: {
742
138
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
138
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
138
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
138
      break;
746
0
    }
747
0
    case Type::DCT64X32: {
748
0
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
0
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
0
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
0
      break;
752
0
    }
753
0
    case Type::DCT32X64: {
754
0
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
0
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
0
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
0
      break;
758
0
    }
759
0
    case Type::DCT64X64: {
760
0
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
0
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
0
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
0
      break;
764
0
    }
765
0
    case Type::DCT128X64: {
766
0
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
0
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
0
      break;
770
0
    }
771
0
    case Type::DCT64X128: {
772
0
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
0
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
0
      break;
776
0
    }
777
0
    case Type::DCT128X128: {
778
0
      ReinterpretingDCT<
779
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
0
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
0
    case Type::DCT256X256: {
799
0
      ReinterpretingDCT<
800
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
0
      break;
804
0
    }
805
20.1k
    case Type::DCT:
806
20.3k
    case Type::DCT2X2:
807
20.3k
    case Type::DCT4X4:
808
21.2k
    case Type::DCT4X8:
809
22.4k
    case Type::DCT8X4:
810
22.5k
    case Type::AFV0:
811
22.5k
    case Type::AFV1:
812
23.1k
    case Type::AFV2:
813
31.1k
    case Type::AFV3:
814
54.2k
    case Type::IDENTITY:
815
54.2k
      llf[0] = dc[0];
816
54.2k
      break;
817
116k
  };
818
116k
}
Unexecuted instantiation: enc_group.cc:jxl::N_SCALAR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
dec_group.cc:jxl::N_SCALAR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
694
116k
                                              float* JXL_RESTRICT scratch) {
695
116k
  using Type = AcStrategyType;
696
116k
  HWY_ALIGN float warm_block[4 * 4];
697
116k
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
116k
  switch (strategy) {
699
522
    case Type::DCT16X8: {
700
522
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
522
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
522
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
522
      break;
704
0
    }
705
60.6k
    case Type::DCT8X16: {
706
60.6k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
60.6k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
60.6k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
60.6k
      break;
710
0
    }
711
1.10k
    case Type::DCT16X16: {
712
1.10k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
1.10k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
1.10k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
1.10k
      break;
716
0
    }
717
0
    case Type::DCT32X8: {
718
0
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
0
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
0
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
0
      break;
722
0
    }
723
0
    case Type::DCT8X32: {
724
0
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
0
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
0
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
0
      break;
728
0
    }
729
294
    case Type::DCT32X16: {
730
294
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
294
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
294
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
294
      break;
734
0
    }
735
147
    case Type::DCT16X32: {
736
147
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
147
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
147
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
147
      break;
740
0
    }
741
138
    case Type::DCT32X32: {
742
138
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
138
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
138
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
138
      break;
746
0
    }
747
0
    case Type::DCT64X32: {
748
0
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
0
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
0
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
0
      break;
752
0
    }
753
0
    case Type::DCT32X64: {
754
0
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
0
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
0
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
0
      break;
758
0
    }
759
0
    case Type::DCT64X64: {
760
0
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
0
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
0
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
0
      break;
764
0
    }
765
0
    case Type::DCT128X64: {
766
0
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
0
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
0
      break;
770
0
    }
771
0
    case Type::DCT64X128: {
772
0
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
0
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
0
      break;
776
0
    }
777
0
    case Type::DCT128X128: {
778
0
      ReinterpretingDCT<
779
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
0
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
0
    case Type::DCT256X256: {
799
0
      ReinterpretingDCT<
800
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
0
      break;
804
0
    }
805
20.1k
    case Type::DCT:
806
20.3k
    case Type::DCT2X2:
807
20.3k
    case Type::DCT4X4:
808
21.2k
    case Type::DCT4X8:
809
22.4k
    case Type::DCT8X4:
810
22.5k
    case Type::AFV0:
811
22.5k
    case Type::AFV1:
812
23.1k
    case Type::AFV2:
813
31.1k
    case Type::AFV3:
814
54.2k
    case Type::IDENTITY:
815
54.2k
      llf[0] = dc[0];
816
54.2k
      break;
817
116k
  };
818
116k
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SCALAR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SCALAR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
819
820
}  // namespace
821
// NOLINTNEXTLINE(google-readability-namespace-comments)
822
}  // namespace HWY_NAMESPACE
823
}  // namespace jxl
824
HWY_AFTER_NAMESPACE();
825
826
#endif  // LIB_JXL_DEC_TRANSFORMS_INL_H_