Coverage Report

Created: 2025-07-23 08:18

/src/libjxl/lib/jxl/dec_transforms-inl.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include <cstring>
7
8
#include "lib/jxl/base/compiler_specific.h"
9
#include "lib/jxl/frame_dimensions.h"
10
11
#if defined(LIB_JXL_DEC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
12
#ifdef LIB_JXL_DEC_TRANSFORMS_INL_H_
13
#undef LIB_JXL_DEC_TRANSFORMS_INL_H_
14
#else
15
#define LIB_JXL_DEC_TRANSFORMS_INL_H_
16
#endif
17
18
#include <cstddef>
19
#include <hwy/highway.h>
20
21
#include "lib/jxl/ac_strategy.h"
22
#include "lib/jxl/dct-inl.h"
23
#include "lib/jxl/dct_scales.h"
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
namespace HWY_NAMESPACE {
27
namespace {
28
29
// These templates are not found via ADL.
30
using hwy::HWY_NAMESPACE::MulAdd;
31
32
// Computes the lowest-frequency LF_ROWSxLF_COLS-sized square in output, which
33
// is a DCT_ROWS*DCT_COLS-sized DCT block, by doing a ROWS*COLS DCT on the
34
// input block.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
JXL_INLINE void ReinterpretingDCT(const float* input, const size_t input_stride,
38
                                  float* output, const size_t output_stride,
39
                                  float* JXL_RESTRICT block,
40
2.53M
                                  float* JXL_RESTRICT scratch_space) {
41
2.53M
  static_assert(LF_ROWS == ROWS,
42
2.53M
                "ReinterpretingDCT should only be called with LF == N");
43
2.53M
  static_assert(LF_COLS == COLS,
44
2.53M
                "ReinterpretingDCT should only be called with LF == N");
45
2.53M
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
2.53M
                                 scratch_space);
47
2.53M
  if (ROWS < COLS) {
48
2.42M
    for (size_t y = 0; y < LF_ROWS; y++) {
49
5.42M
      for (size_t x = 0; x < LF_COLS; x++) {
50
4.07M
        output[y * output_stride + x] =
51
4.07M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
4.07M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
4.07M
      }
54
1.34M
    }
55
1.45M
  } else {
56
4.28M
    for (size_t y = 0; y < LF_COLS; y++) {
57
13.0M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
10.2M
        output[y * output_stride + x] =
59
10.2M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
10.2M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
10.2M
      }
62
2.82M
    }
63
1.45M
  }
64
2.53M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
708k
                                  float* JXL_RESTRICT scratch_space) {
41
708k
  static_assert(LF_ROWS == ROWS,
42
708k
                "ReinterpretingDCT should only be called with LF == N");
43
708k
  static_assert(LF_COLS == COLS,
44
708k
                "ReinterpretingDCT should only be called with LF == N");
45
708k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
708k
                                 scratch_space);
47
708k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
708k
  } else {
56
1.41M
    for (size_t y = 0; y < LF_COLS; y++) {
57
2.12M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.41M
        output[y * output_stride + x] =
59
1.41M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.41M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.41M
      }
62
708k
    }
63
708k
  }
64
708k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
839k
                                  float* JXL_RESTRICT scratch_space) {
41
839k
  static_assert(LF_ROWS == ROWS,
42
839k
                "ReinterpretingDCT should only be called with LF == N");
43
839k
  static_assert(LF_COLS == COLS,
44
839k
                "ReinterpretingDCT should only be called with LF == N");
45
839k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
839k
                                 scratch_space);
47
839k
  if (ROWS < COLS) {
48
1.67M
    for (size_t y = 0; y < LF_ROWS; y++) {
49
2.51M
      for (size_t x = 0; x < LF_COLS; x++) {
50
1.67M
        output[y * output_stride + x] =
51
1.67M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
1.67M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
1.67M
      }
54
839k
    }
55
839k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
839k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
437k
                                  float* JXL_RESTRICT scratch_space) {
41
437k
  static_assert(LF_ROWS == ROWS,
42
437k
                "ReinterpretingDCT should only be called with LF == N");
43
437k
  static_assert(LF_COLS == COLS,
44
437k
                "ReinterpretingDCT should only be called with LF == N");
45
437k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
437k
                                 scratch_space);
47
437k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
437k
  } else {
56
1.31M
    for (size_t y = 0; y < LF_COLS; y++) {
57
2.62M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.74M
        output[y * output_stride + x] =
59
1.74M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.74M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.74M
      }
62
874k
    }
63
437k
  }
64
437k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
54
                                  float* JXL_RESTRICT scratch_space) {
41
54
  static_assert(LF_ROWS == ROWS,
42
54
                "ReinterpretingDCT should only be called with LF == N");
43
54
  static_assert(LF_COLS == COLS,
44
54
                "ReinterpretingDCT should only be called with LF == N");
45
54
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
54
                                 scratch_space);
47
54
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
54
  } else {
56
108
    for (size_t y = 0; y < LF_COLS; y++) {
57
270
      for (size_t x = 0; x < LF_ROWS; x++) {
58
216
        output[y * output_stride + x] =
59
216
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
216
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
216
      }
62
54
    }
63
54
  }
64
54
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
9
                                  float* JXL_RESTRICT scratch_space) {
41
9
  static_assert(LF_ROWS == ROWS,
42
9
                "ReinterpretingDCT should only be called with LF == N");
43
9
  static_assert(LF_COLS == COLS,
44
9
                "ReinterpretingDCT should only be called with LF == N");
45
9
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
9
                                 scratch_space);
47
9
  if (ROWS < COLS) {
48
18
    for (size_t y = 0; y < LF_ROWS; y++) {
49
45
      for (size_t x = 0; x < LF_COLS; x++) {
50
36
        output[y * output_stride + x] =
51
36
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
36
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
36
      }
54
9
    }
55
9
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
9
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
123k
                                  float* JXL_RESTRICT scratch_space) {
41
123k
  static_assert(LF_ROWS == ROWS,
42
123k
                "ReinterpretingDCT should only be called with LF == N");
43
123k
  static_assert(LF_COLS == COLS,
44
123k
                "ReinterpretingDCT should only be called with LF == N");
45
123k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
123k
                                 scratch_space);
47
123k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
123k
  } else {
56
369k
    for (size_t y = 0; y < LF_COLS; y++) {
57
1.23M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
985k
        output[y * output_stride + x] =
59
985k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
985k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
985k
      }
62
246k
    }
63
123k
  }
64
123k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
210k
                                  float* JXL_RESTRICT scratch_space) {
41
210k
  static_assert(LF_ROWS == ROWS,
42
210k
                "ReinterpretingDCT should only be called with LF == N");
43
210k
  static_assert(LF_COLS == COLS,
44
210k
                "ReinterpretingDCT should only be called with LF == N");
45
210k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
210k
                                 scratch_space);
47
210k
  if (ROWS < COLS) {
48
632k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
2.10M
      for (size_t x = 0; x < LF_COLS; x++) {
50
1.68M
        output[y * output_stride + x] =
51
1.68M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
1.68M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
1.68M
      }
54
421k
    }
55
210k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
210k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
116k
                                  float* JXL_RESTRICT scratch_space) {
41
116k
  static_assert(LF_ROWS == ROWS,
42
116k
                "ReinterpretingDCT should only be called with LF == N");
43
116k
  static_assert(LF_COLS == COLS,
44
116k
                "ReinterpretingDCT should only be called with LF == N");
45
116k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
116k
                                 scratch_space);
47
116k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
116k
  } else {
56
583k
    for (size_t y = 0; y < LF_COLS; y++) {
57
2.33M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.86M
        output[y * output_stride + x] =
59
1.86M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.86M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.86M
      }
62
467k
    }
63
116k
  }
64
116k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
14.3k
                                  float* JXL_RESTRICT scratch_space) {
41
14.3k
  static_assert(LF_ROWS == ROWS,
42
14.3k
                "ReinterpretingDCT should only be called with LF == N");
43
14.3k
  static_assert(LF_COLS == COLS,
44
14.3k
                "ReinterpretingDCT should only be called with LF == N");
45
14.3k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
14.3k
                                 scratch_space);
47
14.3k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
14.3k
  } else {
56
71.5k
    for (size_t y = 0; y < LF_COLS; y++) {
57
514k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
457k
        output[y * output_stride + x] =
59
457k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
457k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
457k
      }
62
57.2k
    }
63
14.3k
  }
64
14.3k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
22.0k
                                  float* JXL_RESTRICT scratch_space) {
41
22.0k
  static_assert(LF_ROWS == ROWS,
42
22.0k
                "ReinterpretingDCT should only be called with LF == N");
43
22.0k
  static_assert(LF_COLS == COLS,
44
22.0k
                "ReinterpretingDCT should only be called with LF == N");
45
22.0k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
22.0k
                                 scratch_space);
47
22.0k
  if (ROWS < COLS) {
48
110k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
794k
      for (size_t x = 0; x < LF_COLS; x++) {
50
706k
        output[y * output_stride + x] =
51
706k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
706k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
706k
      }
54
88.3k
    }
55
22.0k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
22.0k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
58.7k
                                  float* JXL_RESTRICT scratch_space) {
41
58.7k
  static_assert(LF_ROWS == ROWS,
42
58.7k
                "ReinterpretingDCT should only be called with LF == N");
43
58.7k
  static_assert(LF_COLS == COLS,
44
58.7k
                "ReinterpretingDCT should only be called with LF == N");
45
58.7k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
58.7k
                                 scratch_space);
47
58.7k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
58.7k
  } else {
56
528k
    for (size_t y = 0; y < LF_COLS; y++) {
57
4.22M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
3.75M
        output[y * output_stride + x] =
59
3.75M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
3.75M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
3.75M
      }
62
469k
    }
63
58.7k
  }
64
58.7k
}
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
65
66
template <size_t S>
67
33.5M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
33.5M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
33.5M
  static_assert(S % 2 == 0, "S should be even");
70
33.5M
  float temp[kDCTBlockSize];
71
33.5M
  constexpr size_t num_2x2 = S / 2;
72
111M
  for (size_t y = 0; y < num_2x2; y++) {
73
313M
    for (size_t x = 0; x < num_2x2; x++) {
74
234M
      float c00 = block[y * kBlockDim + x];
75
234M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
234M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
234M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
234M
      float r00 = c00 + c01 + c10 + c11;
79
234M
      float r01 = c00 + c01 - c10 - c11;
80
234M
      float r10 = c00 - c01 + c10 - c11;
81
234M
      float r11 = c00 - c01 - c10 + c11;
82
234M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
234M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
234M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
234M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
234M
    }
87
78.2M
  }
88
190M
  for (size_t y = 0; y < S; y++) {
89
1.09G
    for (size_t x = 0; x < S; x++) {
90
939M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
939M
    }
92
156M
  }
93
33.5M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.48M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
2.48M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.48M
  static_assert(S % 2 == 0, "S should be even");
70
2.48M
  float temp[kDCTBlockSize];
71
2.48M
  constexpr size_t num_2x2 = S / 2;
72
4.96M
  for (size_t y = 0; y < num_2x2; y++) {
73
4.96M
    for (size_t x = 0; x < num_2x2; x++) {
74
2.48M
      float c00 = block[y * kBlockDim + x];
75
2.48M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
2.48M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
2.48M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
2.48M
      float r00 = c00 + c01 + c10 + c11;
79
2.48M
      float r01 = c00 + c01 - c10 - c11;
80
2.48M
      float r10 = c00 - c01 + c10 - c11;
81
2.48M
      float r11 = c00 - c01 - c10 + c11;
82
2.48M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
2.48M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
2.48M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
2.48M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
2.48M
    }
87
2.48M
  }
88
7.44M
  for (size_t y = 0; y < S; y++) {
89
14.8M
    for (size_t x = 0; x < S; x++) {
90
9.92M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
9.92M
    }
92
4.96M
  }
93
2.48M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.48M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
2.48M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.48M
  static_assert(S % 2 == 0, "S should be even");
70
2.48M
  float temp[kDCTBlockSize];
71
2.48M
  constexpr size_t num_2x2 = S / 2;
72
7.44M
  for (size_t y = 0; y < num_2x2; y++) {
73
14.8M
    for (size_t x = 0; x < num_2x2; x++) {
74
9.92M
      float c00 = block[y * kBlockDim + x];
75
9.92M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
9.92M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
9.92M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
9.92M
      float r00 = c00 + c01 + c10 + c11;
79
9.92M
      float r01 = c00 + c01 - c10 - c11;
80
9.92M
      float r10 = c00 - c01 + c10 - c11;
81
9.92M
      float r11 = c00 - c01 - c10 + c11;
82
9.92M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
9.92M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
9.92M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
9.92M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
9.92M
    }
87
4.96M
  }
88
12.4M
  for (size_t y = 0; y < S; y++) {
89
49.6M
    for (size_t x = 0; x < S; x++) {
90
39.6M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
39.6M
    }
92
9.92M
  }
93
2.48M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.48M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
2.48M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.48M
  static_assert(S % 2 == 0, "S should be even");
70
2.48M
  float temp[kDCTBlockSize];
71
2.48M
  constexpr size_t num_2x2 = S / 2;
72
12.4M
  for (size_t y = 0; y < num_2x2; y++) {
73
49.6M
    for (size_t x = 0; x < num_2x2; x++) {
74
39.6M
      float c00 = block[y * kBlockDim + x];
75
39.6M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
39.6M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
39.6M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
39.6M
      float r00 = c00 + c01 + c10 + c11;
79
39.6M
      float r01 = c00 + c01 - c10 - c11;
80
39.6M
      float r10 = c00 - c01 + c10 - c11;
81
39.6M
      float r11 = c00 - c01 - c10 + c11;
82
39.6M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
39.6M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
39.6M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
39.6M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
39.6M
    }
87
9.92M
  }
88
22.3M
  for (size_t y = 0; y < S; y++) {
89
178M
    for (size_t x = 0; x < S; x++) {
90
158M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
158M
    }
92
19.8M
  }
93
2.48M
}
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
8.69M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
8.69M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
8.69M
  static_assert(S % 2 == 0, "S should be even");
70
8.69M
  float temp[kDCTBlockSize];
71
8.69M
  constexpr size_t num_2x2 = S / 2;
72
17.3M
  for (size_t y = 0; y < num_2x2; y++) {
73
17.3M
    for (size_t x = 0; x < num_2x2; x++) {
74
8.69M
      float c00 = block[y * kBlockDim + x];
75
8.69M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
8.69M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
8.69M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
8.69M
      float r00 = c00 + c01 + c10 + c11;
79
8.69M
      float r01 = c00 + c01 - c10 - c11;
80
8.69M
      float r10 = c00 - c01 + c10 - c11;
81
8.69M
      float r11 = c00 - c01 - c10 + c11;
82
8.69M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
8.69M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
8.69M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
8.69M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
8.69M
    }
87
8.69M
  }
88
26.0M
  for (size_t y = 0; y < S; y++) {
89
52.1M
    for (size_t x = 0; x < S; x++) {
90
34.7M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
34.7M
    }
92
17.3M
  }
93
8.69M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
8.69M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
8.69M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
8.69M
  static_assert(S % 2 == 0, "S should be even");
70
8.69M
  float temp[kDCTBlockSize];
71
8.69M
  constexpr size_t num_2x2 = S / 2;
72
26.0M
  for (size_t y = 0; y < num_2x2; y++) {
73
52.1M
    for (size_t x = 0; x < num_2x2; x++) {
74
34.7M
      float c00 = block[y * kBlockDim + x];
75
34.7M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
34.7M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
34.7M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
34.7M
      float r00 = c00 + c01 + c10 + c11;
79
34.7M
      float r01 = c00 + c01 - c10 - c11;
80
34.7M
      float r10 = c00 - c01 + c10 - c11;
81
34.7M
      float r11 = c00 - c01 - c10 + c11;
82
34.7M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
34.7M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
34.7M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
34.7M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
34.7M
    }
87
17.3M
  }
88
43.4M
  for (size_t y = 0; y < S; y++) {
89
173M
    for (size_t x = 0; x < S; x++) {
90
139M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
139M
    }
92
34.7M
  }
93
8.69M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
8.69M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
8.69M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
8.69M
  static_assert(S % 2 == 0, "S should be even");
70
8.69M
  float temp[kDCTBlockSize];
71
8.69M
  constexpr size_t num_2x2 = S / 2;
72
43.4M
  for (size_t y = 0; y < num_2x2; y++) {
73
173M
    for (size_t x = 0; x < num_2x2; x++) {
74
139M
      float c00 = block[y * kBlockDim + x];
75
139M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
139M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
139M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
139M
      float r00 = c00 + c01 + c10 + c11;
79
139M
      float r01 = c00 + c01 - c10 - c11;
80
139M
      float r10 = c00 - c01 + c10 - c11;
81
139M
      float r11 = c00 - c01 - c10 + c11;
82
139M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
139M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
139M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
139M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
139M
    }
87
34.7M
  }
88
78.2M
  for (size_t y = 0; y < S; y++) {
89
626M
    for (size_t x = 0; x < S; x++) {
90
556M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
556M
    }
92
69.5M
  }
93
8.69M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
94
95
36.8M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
36.8M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
36.8M
      {
98
36.8M
          0.25,
99
36.8M
          0.25,
100
36.8M
          0.25,
101
36.8M
          0.25,
102
36.8M
          0.25,
103
36.8M
          0.25,
104
36.8M
          0.25,
105
36.8M
          0.25,
106
36.8M
          0.25,
107
36.8M
          0.25,
108
36.8M
          0.25,
109
36.8M
          0.25,
110
36.8M
          0.25,
111
36.8M
          0.25,
112
36.8M
          0.25,
113
36.8M
          0.25,
114
36.8M
      },
115
36.8M
      {
116
36.8M
          0.876902929799142f,
117
36.8M
          0.2206518106944235f,
118
36.8M
          -0.10140050393753763f,
119
36.8M
          -0.1014005039375375f,
120
36.8M
          0.2206518106944236f,
121
36.8M
          -0.10140050393753777f,
122
36.8M
          -0.10140050393753772f,
123
36.8M
          -0.10140050393753763f,
124
36.8M
          -0.10140050393753758f,
125
36.8M
          -0.10140050393753769f,
126
36.8M
          -0.1014005039375375f,
127
36.8M
          -0.10140050393753768f,
128
36.8M
          -0.10140050393753768f,
129
36.8M
          -0.10140050393753759f,
130
36.8M
          -0.10140050393753763f,
131
36.8M
          -0.10140050393753741f,
132
36.8M
      },
133
36.8M
      {
134
36.8M
          0.0,
135
36.8M
          0.0,
136
36.8M
          0.40670075830260755f,
137
36.8M
          0.44444816619734445f,
138
36.8M
          0.0,
139
36.8M
          0.0,
140
36.8M
          0.19574399372042936f,
141
36.8M
          0.2929100136981264f,
142
36.8M
          -0.40670075830260716f,
143
36.8M
          -0.19574399372042872f,
144
36.8M
          0.0,
145
36.8M
          0.11379074460448091f,
146
36.8M
          -0.44444816619734384f,
147
36.8M
          -0.29291001369812636f,
148
36.8M
          -0.1137907446044814f,
149
36.8M
          0.0,
150
36.8M
      },
151
36.8M
      {
152
36.8M
          0.0,
153
36.8M
          0.0,
154
36.8M
          -0.21255748058288748f,
155
36.8M
          0.3085497062849767f,
156
36.8M
          0.0,
157
36.8M
          0.4706702258572536f,
158
36.8M
          -0.1621205195722993f,
159
36.8M
          0.0,
160
36.8M
          -0.21255748058287047f,
161
36.8M
          -0.16212051957228327f,
162
36.8M
          -0.47067022585725277f,
163
36.8M
          -0.1464291867126764f,
164
36.8M
          0.3085497062849487f,
165
36.8M
          0.0,
166
36.8M
          -0.14642918671266536f,
167
36.8M
          0.4251149611657548f,
168
36.8M
      },
169
36.8M
      {
170
36.8M
          0.0,
171
36.8M
          -0.7071067811865474f,
172
36.8M
          0.0,
173
36.8M
          0.0,
174
36.8M
          0.7071067811865476f,
175
36.8M
          0.0,
176
36.8M
          0.0,
177
36.8M
          0.0,
178
36.8M
          0.0,
179
36.8M
          0.0,
180
36.8M
          0.0,
181
36.8M
          0.0,
182
36.8M
          0.0,
183
36.8M
          0.0,
184
36.8M
          0.0,
185
36.8M
          0.0,
186
36.8M
      },
187
36.8M
      {
188
36.8M
          -0.4105377591765233f,
189
36.8M
          0.6235485373547691f,
190
36.8M
          -0.06435071657946274f,
191
36.8M
          -0.06435071657946266f,
192
36.8M
          0.6235485373547694f,
193
36.8M
          -0.06435071657946284f,
194
36.8M
          -0.0643507165794628f,
195
36.8M
          -0.06435071657946274f,
196
36.8M
          -0.06435071657946272f,
197
36.8M
          -0.06435071657946279f,
198
36.8M
          -0.06435071657946266f,
199
36.8M
          -0.06435071657946277f,
200
36.8M
          -0.06435071657946277f,
201
36.8M
          -0.06435071657946273f,
202
36.8M
          -0.06435071657946274f,
203
36.8M
          -0.0643507165794626f,
204
36.8M
      },
205
36.8M
      {
206
36.8M
          0.0,
207
36.8M
          0.0,
208
36.8M
          -0.4517556589999482f,
209
36.8M
          0.15854503551840063f,
210
36.8M
          0.0,
211
36.8M
          -0.04038515160822202f,
212
36.8M
          0.0074182263792423875f,
213
36.8M
          0.39351034269210167f,
214
36.8M
          -0.45175565899994635f,
215
36.8M
          0.007418226379244351f,
216
36.8M
          0.1107416575309343f,
217
36.8M
          0.08298163094882051f,
218
36.8M
          0.15854503551839705f,
219
36.8M
          0.3935103426921022f,
220
36.8M
          0.0829816309488214f,
221
36.8M
          -0.45175565899994796f,
222
36.8M
      },
223
36.8M
      {
224
36.8M
          0.0,
225
36.8M
          0.0,
226
36.8M
          -0.304684750724869f,
227
36.8M
          0.5112616136591823f,
228
36.8M
          0.0,
229
36.8M
          0.0,
230
36.8M
          -0.290480129728998f,
231
36.8M
          -0.06578701549142804f,
232
36.8M
          0.304684750724884f,
233
36.8M
          0.2904801297290076f,
234
36.8M
          0.0,
235
36.8M
          -0.23889773523344604f,
236
36.8M
          -0.5112616136592012f,
237
36.8M
          0.06578701549142545f,
238
36.8M
          0.23889773523345467f,
239
36.8M
          0.0,
240
36.8M
      },
241
36.8M
      {
242
36.8M
          0.0,
243
36.8M
          0.0,
244
36.8M
          0.3017929516615495f,
245
36.8M
          0.25792362796341184f,
246
36.8M
          0.0,
247
36.8M
          0.16272340142866204f,
248
36.8M
          0.09520022653475037f,
249
36.8M
          0.0,
250
36.8M
          0.3017929516615503f,
251
36.8M
          0.09520022653475055f,
252
36.8M
          -0.16272340142866173f,
253
36.8M
          -0.35312385449816297f,
254
36.8M
          0.25792362796341295f,
255
36.8M
          0.0,
256
36.8M
          -0.3531238544981624f,
257
36.8M
          -0.6035859033230976f,
258
36.8M
      },
259
36.8M
      {
260
36.8M
          0.0,
261
36.8M
          0.0,
262
36.8M
          0.40824829046386274f,
263
36.8M
          0.0,
264
36.8M
          0.0,
265
36.8M
          0.0,
266
36.8M
          0.0,
267
36.8M
          -0.4082482904638628f,
268
36.8M
          -0.4082482904638635f,
269
36.8M
          0.0,
270
36.8M
          0.0,
271
36.8M
          -0.40824829046386296f,
272
36.8M
          0.0,
273
36.8M
          0.4082482904638634f,
274
36.8M
          0.408248290463863f,
275
36.8M
          0.0,
276
36.8M
      },
277
36.8M
      {
278
36.8M
          0.0,
279
36.8M
          0.0,
280
36.8M
          0.1747866975480809f,
281
36.8M
          0.0812611176717539f,
282
36.8M
          0.0,
283
36.8M
          0.0,
284
36.8M
          -0.3675398009862027f,
285
36.8M
          -0.307882213957909f,
286
36.8M
          -0.17478669754808135f,
287
36.8M
          0.3675398009862011f,
288
36.8M
          0.0,
289
36.8M
          0.4826689115059883f,
290
36.8M
          -0.08126111767175039f,
291
36.8M
          0.30788221395790305f,
292
36.8M
          -0.48266891150598584f,
293
36.8M
          0.0,
294
36.8M
      },
295
36.8M
      {
296
36.8M
          0.0,
297
36.8M
          0.0,
298
36.8M
          -0.21105601049335784f,
299
36.8M
          0.18567180916109802f,
300
36.8M
          0.0,
301
36.8M
          0.0,
302
36.8M
          0.49215859013738733f,
303
36.8M
          -0.38525013709251915f,
304
36.8M
          0.21105601049335806f,
305
36.8M
          -0.49215859013738905f,
306
36.8M
          0.0,
307
36.8M
          0.17419412659916217f,
308
36.8M
          -0.18567180916109904f,
309
36.8M
          0.3852501370925211f,
310
36.8M
          -0.1741941265991621f,
311
36.8M
          0.0,
312
36.8M
      },
313
36.8M
      {
314
36.8M
          0.0,
315
36.8M
          0.0,
316
36.8M
          -0.14266084808807264f,
317
36.8M
          -0.3416446842253372f,
318
36.8M
          0.0,
319
36.8M
          0.7367497537172237f,
320
36.8M
          0.24627107722075148f,
321
36.8M
          -0.08574019035519306f,
322
36.8M
          -0.14266084808807344f,
323
36.8M
          0.24627107722075137f,
324
36.8M
          0.14883399227113567f,
325
36.8M
          -0.04768680350229251f,
326
36.8M
          -0.3416446842253373f,
327
36.8M
          -0.08574019035519267f,
328
36.8M
          -0.047686803502292804f,
329
36.8M
          -0.14266084808807242f,
330
36.8M
      },
331
36.8M
      {
332
36.8M
          0.0,
333
36.8M
          0.0,
334
36.8M
          -0.13813540350758585f,
335
36.8M
          0.3302282550303788f,
336
36.8M
          0.0,
337
36.8M
          0.08755115000587084f,
338
36.8M
          -0.07946706605909573f,
339
36.8M
          -0.4613374887461511f,
340
36.8M
          -0.13813540350758294f,
341
36.8M
          -0.07946706605910261f,
342
36.8M
          0.49724647109535086f,
343
36.8M
          0.12538059448563663f,
344
36.8M
          0.3302282550303805f,
345
36.8M
          -0.4613374887461554f,
346
36.8M
          0.12538059448564315f,
347
36.8M
          -0.13813540350758452f,
348
36.8M
      },
349
36.8M
      {
350
36.8M
          0.0,
351
36.8M
          0.0,
352
36.8M
          -0.17437602599651067f,
353
36.8M
          0.0702790691196284f,
354
36.8M
          0.0,
355
36.8M
          -0.2921026642334881f,
356
36.8M
          0.3623817333531167f,
357
36.8M
          0.0,
358
36.8M
          -0.1743760259965108f,
359
36.8M
          0.36238173335311646f,
360
36.8M
          0.29210266423348785f,
361
36.8M
          -0.4326608024727445f,
362
36.8M
          0.07027906911962818f,
363
36.8M
          0.0,
364
36.8M
          -0.4326608024727457f,
365
36.8M
          0.34875205199302267f,
366
36.8M
      },
367
36.8M
      {
368
36.8M
          0.0,
369
36.8M
          0.0,
370
36.8M
          0.11354987314994337f,
371
36.8M
          -0.07417504595810355f,
372
36.8M
          0.0,
373
36.8M
          0.19402893032594343f,
374
36.8M
          -0.435190496523228f,
375
36.8M
          0.21918684838857466f,
376
36.8M
          0.11354987314994257f,
377
36.8M
          -0.4351904965232251f,
378
36.8M
          0.5550443808910661f,
379
36.8M
          -0.25468277124066463f,
380
36.8M
          -0.07417504595810233f,
381
36.8M
          0.2191868483885728f,
382
36.8M
          -0.25468277124066413f,
383
36.8M
          0.1135498731499429f,
384
36.8M
      },
385
36.8M
  };
386
387
36.8M
  const HWY_CAPPED(float, 16) d;
388
110M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
73.7M
    auto pixel = Zero(d);
390
1.25G
    for (size_t j = 0; j < 16; j++) {
391
1.17G
      auto cf = Set(d, coeffs[j]);
392
1.17G
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
1.17G
      pixel = MulAdd(cf, basis, pixel);
394
1.17G
    }
395
73.7M
    Store(pixel, d, pixels + i);
396
73.7M
  }
397
36.8M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
2.05M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
2.05M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
2.05M
      {
98
2.05M
          0.25,
99
2.05M
          0.25,
100
2.05M
          0.25,
101
2.05M
          0.25,
102
2.05M
          0.25,
103
2.05M
          0.25,
104
2.05M
          0.25,
105
2.05M
          0.25,
106
2.05M
          0.25,
107
2.05M
          0.25,
108
2.05M
          0.25,
109
2.05M
          0.25,
110
2.05M
          0.25,
111
2.05M
          0.25,
112
2.05M
          0.25,
113
2.05M
          0.25,
114
2.05M
      },
115
2.05M
      {
116
2.05M
          0.876902929799142f,
117
2.05M
          0.2206518106944235f,
118
2.05M
          -0.10140050393753763f,
119
2.05M
          -0.1014005039375375f,
120
2.05M
          0.2206518106944236f,
121
2.05M
          -0.10140050393753777f,
122
2.05M
          -0.10140050393753772f,
123
2.05M
          -0.10140050393753763f,
124
2.05M
          -0.10140050393753758f,
125
2.05M
          -0.10140050393753769f,
126
2.05M
          -0.1014005039375375f,
127
2.05M
          -0.10140050393753768f,
128
2.05M
          -0.10140050393753768f,
129
2.05M
          -0.10140050393753759f,
130
2.05M
          -0.10140050393753763f,
131
2.05M
          -0.10140050393753741f,
132
2.05M
      },
133
2.05M
      {
134
2.05M
          0.0,
135
2.05M
          0.0,
136
2.05M
          0.40670075830260755f,
137
2.05M
          0.44444816619734445f,
138
2.05M
          0.0,
139
2.05M
          0.0,
140
2.05M
          0.19574399372042936f,
141
2.05M
          0.2929100136981264f,
142
2.05M
          -0.40670075830260716f,
143
2.05M
          -0.19574399372042872f,
144
2.05M
          0.0,
145
2.05M
          0.11379074460448091f,
146
2.05M
          -0.44444816619734384f,
147
2.05M
          -0.29291001369812636f,
148
2.05M
          -0.1137907446044814f,
149
2.05M
          0.0,
150
2.05M
      },
151
2.05M
      {
152
2.05M
          0.0,
153
2.05M
          0.0,
154
2.05M
          -0.21255748058288748f,
155
2.05M
          0.3085497062849767f,
156
2.05M
          0.0,
157
2.05M
          0.4706702258572536f,
158
2.05M
          -0.1621205195722993f,
159
2.05M
          0.0,
160
2.05M
          -0.21255748058287047f,
161
2.05M
          -0.16212051957228327f,
162
2.05M
          -0.47067022585725277f,
163
2.05M
          -0.1464291867126764f,
164
2.05M
          0.3085497062849487f,
165
2.05M
          0.0,
166
2.05M
          -0.14642918671266536f,
167
2.05M
          0.4251149611657548f,
168
2.05M
      },
169
2.05M
      {
170
2.05M
          0.0,
171
2.05M
          -0.7071067811865474f,
172
2.05M
          0.0,
173
2.05M
          0.0,
174
2.05M
          0.7071067811865476f,
175
2.05M
          0.0,
176
2.05M
          0.0,
177
2.05M
          0.0,
178
2.05M
          0.0,
179
2.05M
          0.0,
180
2.05M
          0.0,
181
2.05M
          0.0,
182
2.05M
          0.0,
183
2.05M
          0.0,
184
2.05M
          0.0,
185
2.05M
          0.0,
186
2.05M
      },
187
2.05M
      {
188
2.05M
          -0.4105377591765233f,
189
2.05M
          0.6235485373547691f,
190
2.05M
          -0.06435071657946274f,
191
2.05M
          -0.06435071657946266f,
192
2.05M
          0.6235485373547694f,
193
2.05M
          -0.06435071657946284f,
194
2.05M
          -0.0643507165794628f,
195
2.05M
          -0.06435071657946274f,
196
2.05M
          -0.06435071657946272f,
197
2.05M
          -0.06435071657946279f,
198
2.05M
          -0.06435071657946266f,
199
2.05M
          -0.06435071657946277f,
200
2.05M
          -0.06435071657946277f,
201
2.05M
          -0.06435071657946273f,
202
2.05M
          -0.06435071657946274f,
203
2.05M
          -0.0643507165794626f,
204
2.05M
      },
205
2.05M
      {
206
2.05M
          0.0,
207
2.05M
          0.0,
208
2.05M
          -0.4517556589999482f,
209
2.05M
          0.15854503551840063f,
210
2.05M
          0.0,
211
2.05M
          -0.04038515160822202f,
212
2.05M
          0.0074182263792423875f,
213
2.05M
          0.39351034269210167f,
214
2.05M
          -0.45175565899994635f,
215
2.05M
          0.007418226379244351f,
216
2.05M
          0.1107416575309343f,
217
2.05M
          0.08298163094882051f,
218
2.05M
          0.15854503551839705f,
219
2.05M
          0.3935103426921022f,
220
2.05M
          0.0829816309488214f,
221
2.05M
          -0.45175565899994796f,
222
2.05M
      },
223
2.05M
      {
224
2.05M
          0.0,
225
2.05M
          0.0,
226
2.05M
          -0.304684750724869f,
227
2.05M
          0.5112616136591823f,
228
2.05M
          0.0,
229
2.05M
          0.0,
230
2.05M
          -0.290480129728998f,
231
2.05M
          -0.06578701549142804f,
232
2.05M
          0.304684750724884f,
233
2.05M
          0.2904801297290076f,
234
2.05M
          0.0,
235
2.05M
          -0.23889773523344604f,
236
2.05M
          -0.5112616136592012f,
237
2.05M
          0.06578701549142545f,
238
2.05M
          0.23889773523345467f,
239
2.05M
          0.0,
240
2.05M
      },
241
2.05M
      {
242
2.05M
          0.0,
243
2.05M
          0.0,
244
2.05M
          0.3017929516615495f,
245
2.05M
          0.25792362796341184f,
246
2.05M
          0.0,
247
2.05M
          0.16272340142866204f,
248
2.05M
          0.09520022653475037f,
249
2.05M
          0.0,
250
2.05M
          0.3017929516615503f,
251
2.05M
          0.09520022653475055f,
252
2.05M
          -0.16272340142866173f,
253
2.05M
          -0.35312385449816297f,
254
2.05M
          0.25792362796341295f,
255
2.05M
          0.0,
256
2.05M
          -0.3531238544981624f,
257
2.05M
          -0.6035859033230976f,
258
2.05M
      },
259
2.05M
      {
260
2.05M
          0.0,
261
2.05M
          0.0,
262
2.05M
          0.40824829046386274f,
263
2.05M
          0.0,
264
2.05M
          0.0,
265
2.05M
          0.0,
266
2.05M
          0.0,
267
2.05M
          -0.4082482904638628f,
268
2.05M
          -0.4082482904638635f,
269
2.05M
          0.0,
270
2.05M
          0.0,
271
2.05M
          -0.40824829046386296f,
272
2.05M
          0.0,
273
2.05M
          0.4082482904638634f,
274
2.05M
          0.408248290463863f,
275
2.05M
          0.0,
276
2.05M
      },
277
2.05M
      {
278
2.05M
          0.0,
279
2.05M
          0.0,
280
2.05M
          0.1747866975480809f,
281
2.05M
          0.0812611176717539f,
282
2.05M
          0.0,
283
2.05M
          0.0,
284
2.05M
          -0.3675398009862027f,
285
2.05M
          -0.307882213957909f,
286
2.05M
          -0.17478669754808135f,
287
2.05M
          0.3675398009862011f,
288
2.05M
          0.0,
289
2.05M
          0.4826689115059883f,
290
2.05M
          -0.08126111767175039f,
291
2.05M
          0.30788221395790305f,
292
2.05M
          -0.48266891150598584f,
293
2.05M
          0.0,
294
2.05M
      },
295
2.05M
      {
296
2.05M
          0.0,
297
2.05M
          0.0,
298
2.05M
          -0.21105601049335784f,
299
2.05M
          0.18567180916109802f,
300
2.05M
          0.0,
301
2.05M
          0.0,
302
2.05M
          0.49215859013738733f,
303
2.05M
          -0.38525013709251915f,
304
2.05M
          0.21105601049335806f,
305
2.05M
          -0.49215859013738905f,
306
2.05M
          0.0,
307
2.05M
          0.17419412659916217f,
308
2.05M
          -0.18567180916109904f,
309
2.05M
          0.3852501370925211f,
310
2.05M
          -0.1741941265991621f,
311
2.05M
          0.0,
312
2.05M
      },
313
2.05M
      {
314
2.05M
          0.0,
315
2.05M
          0.0,
316
2.05M
          -0.14266084808807264f,
317
2.05M
          -0.3416446842253372f,
318
2.05M
          0.0,
319
2.05M
          0.7367497537172237f,
320
2.05M
          0.24627107722075148f,
321
2.05M
          -0.08574019035519306f,
322
2.05M
          -0.14266084808807344f,
323
2.05M
          0.24627107722075137f,
324
2.05M
          0.14883399227113567f,
325
2.05M
          -0.04768680350229251f,
326
2.05M
          -0.3416446842253373f,
327
2.05M
          -0.08574019035519267f,
328
2.05M
          -0.047686803502292804f,
329
2.05M
          -0.14266084808807242f,
330
2.05M
      },
331
2.05M
      {
332
2.05M
          0.0,
333
2.05M
          0.0,
334
2.05M
          -0.13813540350758585f,
335
2.05M
          0.3302282550303788f,
336
2.05M
          0.0,
337
2.05M
          0.08755115000587084f,
338
2.05M
          -0.07946706605909573f,
339
2.05M
          -0.4613374887461511f,
340
2.05M
          -0.13813540350758294f,
341
2.05M
          -0.07946706605910261f,
342
2.05M
          0.49724647109535086f,
343
2.05M
          0.12538059448563663f,
344
2.05M
          0.3302282550303805f,
345
2.05M
          -0.4613374887461554f,
346
2.05M
          0.12538059448564315f,
347
2.05M
          -0.13813540350758452f,
348
2.05M
      },
349
2.05M
      {
350
2.05M
          0.0,
351
2.05M
          0.0,
352
2.05M
          -0.17437602599651067f,
353
2.05M
          0.0702790691196284f,
354
2.05M
          0.0,
355
2.05M
          -0.2921026642334881f,
356
2.05M
          0.3623817333531167f,
357
2.05M
          0.0,
358
2.05M
          -0.1743760259965108f,
359
2.05M
          0.36238173335311646f,
360
2.05M
          0.29210266423348785f,
361
2.05M
          -0.4326608024727445f,
362
2.05M
          0.07027906911962818f,
363
2.05M
          0.0,
364
2.05M
          -0.4326608024727457f,
365
2.05M
          0.34875205199302267f,
366
2.05M
      },
367
2.05M
      {
368
2.05M
          0.0,
369
2.05M
          0.0,
370
2.05M
          0.11354987314994337f,
371
2.05M
          -0.07417504595810355f,
372
2.05M
          0.0,
373
2.05M
          0.19402893032594343f,
374
2.05M
          -0.435190496523228f,
375
2.05M
          0.21918684838857466f,
376
2.05M
          0.11354987314994257f,
377
2.05M
          -0.4351904965232251f,
378
2.05M
          0.5550443808910661f,
379
2.05M
          -0.25468277124066463f,
380
2.05M
          -0.07417504595810233f,
381
2.05M
          0.2191868483885728f,
382
2.05M
          -0.25468277124066413f,
383
2.05M
          0.1135498731499429f,
384
2.05M
      },
385
2.05M
  };
386
387
2.05M
  const HWY_CAPPED(float, 16) d;
388
6.17M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
4.11M
    auto pixel = Zero(d);
390
69.9M
    for (size_t j = 0; j < 16; j++) {
391
65.8M
      auto cf = Set(d, coeffs[j]);
392
65.8M
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
65.8M
      pixel = MulAdd(cf, basis, pixel);
394
65.8M
    }
395
4.11M
    Store(pixel, d, pixels + i);
396
4.11M
  }
397
2.05M
}
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
34.7M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
34.7M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
34.7M
      {
98
34.7M
          0.25,
99
34.7M
          0.25,
100
34.7M
          0.25,
101
34.7M
          0.25,
102
34.7M
          0.25,
103
34.7M
          0.25,
104
34.7M
          0.25,
105
34.7M
          0.25,
106
34.7M
          0.25,
107
34.7M
          0.25,
108
34.7M
          0.25,
109
34.7M
          0.25,
110
34.7M
          0.25,
111
34.7M
          0.25,
112
34.7M
          0.25,
113
34.7M
          0.25,
114
34.7M
      },
115
34.7M
      {
116
34.7M
          0.876902929799142f,
117
34.7M
          0.2206518106944235f,
118
34.7M
          -0.10140050393753763f,
119
34.7M
          -0.1014005039375375f,
120
34.7M
          0.2206518106944236f,
121
34.7M
          -0.10140050393753777f,
122
34.7M
          -0.10140050393753772f,
123
34.7M
          -0.10140050393753763f,
124
34.7M
          -0.10140050393753758f,
125
34.7M
          -0.10140050393753769f,
126
34.7M
          -0.1014005039375375f,
127
34.7M
          -0.10140050393753768f,
128
34.7M
          -0.10140050393753768f,
129
34.7M
          -0.10140050393753759f,
130
34.7M
          -0.10140050393753763f,
131
34.7M
          -0.10140050393753741f,
132
34.7M
      },
133
34.7M
      {
134
34.7M
          0.0,
135
34.7M
          0.0,
136
34.7M
          0.40670075830260755f,
137
34.7M
          0.44444816619734445f,
138
34.7M
          0.0,
139
34.7M
          0.0,
140
34.7M
          0.19574399372042936f,
141
34.7M
          0.2929100136981264f,
142
34.7M
          -0.40670075830260716f,
143
34.7M
          -0.19574399372042872f,
144
34.7M
          0.0,
145
34.7M
          0.11379074460448091f,
146
34.7M
          -0.44444816619734384f,
147
34.7M
          -0.29291001369812636f,
148
34.7M
          -0.1137907446044814f,
149
34.7M
          0.0,
150
34.7M
      },
151
34.7M
      {
152
34.7M
          0.0,
153
34.7M
          0.0,
154
34.7M
          -0.21255748058288748f,
155
34.7M
          0.3085497062849767f,
156
34.7M
          0.0,
157
34.7M
          0.4706702258572536f,
158
34.7M
          -0.1621205195722993f,
159
34.7M
          0.0,
160
34.7M
          -0.21255748058287047f,
161
34.7M
          -0.16212051957228327f,
162
34.7M
          -0.47067022585725277f,
163
34.7M
          -0.1464291867126764f,
164
34.7M
          0.3085497062849487f,
165
34.7M
          0.0,
166
34.7M
          -0.14642918671266536f,
167
34.7M
          0.4251149611657548f,
168
34.7M
      },
169
34.7M
      {
170
34.7M
          0.0,
171
34.7M
          -0.7071067811865474f,
172
34.7M
          0.0,
173
34.7M
          0.0,
174
34.7M
          0.7071067811865476f,
175
34.7M
          0.0,
176
34.7M
          0.0,
177
34.7M
          0.0,
178
34.7M
          0.0,
179
34.7M
          0.0,
180
34.7M
          0.0,
181
34.7M
          0.0,
182
34.7M
          0.0,
183
34.7M
          0.0,
184
34.7M
          0.0,
185
34.7M
          0.0,
186
34.7M
      },
187
34.7M
      {
188
34.7M
          -0.4105377591765233f,
189
34.7M
          0.6235485373547691f,
190
34.7M
          -0.06435071657946274f,
191
34.7M
          -0.06435071657946266f,
192
34.7M
          0.6235485373547694f,
193
34.7M
          -0.06435071657946284f,
194
34.7M
          -0.0643507165794628f,
195
34.7M
          -0.06435071657946274f,
196
34.7M
          -0.06435071657946272f,
197
34.7M
          -0.06435071657946279f,
198
34.7M
          -0.06435071657946266f,
199
34.7M
          -0.06435071657946277f,
200
34.7M
          -0.06435071657946277f,
201
34.7M
          -0.06435071657946273f,
202
34.7M
          -0.06435071657946274f,
203
34.7M
          -0.0643507165794626f,
204
34.7M
      },
205
34.7M
      {
206
34.7M
          0.0,
207
34.7M
          0.0,
208
34.7M
          -0.4517556589999482f,
209
34.7M
          0.15854503551840063f,
210
34.7M
          0.0,
211
34.7M
          -0.04038515160822202f,
212
34.7M
          0.0074182263792423875f,
213
34.7M
          0.39351034269210167f,
214
34.7M
          -0.45175565899994635f,
215
34.7M
          0.007418226379244351f,
216
34.7M
          0.1107416575309343f,
217
34.7M
          0.08298163094882051f,
218
34.7M
          0.15854503551839705f,
219
34.7M
          0.3935103426921022f,
220
34.7M
          0.0829816309488214f,
221
34.7M
          -0.45175565899994796f,
222
34.7M
      },
223
34.7M
      {
224
34.7M
          0.0,
225
34.7M
          0.0,
226
34.7M
          -0.304684750724869f,
227
34.7M
          0.5112616136591823f,
228
34.7M
          0.0,
229
34.7M
          0.0,
230
34.7M
          -0.290480129728998f,
231
34.7M
          -0.06578701549142804f,
232
34.7M
          0.304684750724884f,
233
34.7M
          0.2904801297290076f,
234
34.7M
          0.0,
235
34.7M
          -0.23889773523344604f,
236
34.7M
          -0.5112616136592012f,
237
34.7M
          0.06578701549142545f,
238
34.7M
          0.23889773523345467f,
239
34.7M
          0.0,
240
34.7M
      },
241
34.7M
      {
242
34.7M
          0.0,
243
34.7M
          0.0,
244
34.7M
          0.3017929516615495f,
245
34.7M
          0.25792362796341184f,
246
34.7M
          0.0,
247
34.7M
          0.16272340142866204f,
248
34.7M
          0.09520022653475037f,
249
34.7M
          0.0,
250
34.7M
          0.3017929516615503f,
251
34.7M
          0.09520022653475055f,
252
34.7M
          -0.16272340142866173f,
253
34.7M
          -0.35312385449816297f,
254
34.7M
          0.25792362796341295f,
255
34.7M
          0.0,
256
34.7M
          -0.3531238544981624f,
257
34.7M
          -0.6035859033230976f,
258
34.7M
      },
259
34.7M
      {
260
34.7M
          0.0,
261
34.7M
          0.0,
262
34.7M
          0.40824829046386274f,
263
34.7M
          0.0,
264
34.7M
          0.0,
265
34.7M
          0.0,
266
34.7M
          0.0,
267
34.7M
          -0.4082482904638628f,
268
34.7M
          -0.4082482904638635f,
269
34.7M
          0.0,
270
34.7M
          0.0,
271
34.7M
          -0.40824829046386296f,
272
34.7M
          0.0,
273
34.7M
          0.4082482904638634f,
274
34.7M
          0.408248290463863f,
275
34.7M
          0.0,
276
34.7M
      },
277
34.7M
      {
278
34.7M
          0.0,
279
34.7M
          0.0,
280
34.7M
          0.1747866975480809f,
281
34.7M
          0.0812611176717539f,
282
34.7M
          0.0,
283
34.7M
          0.0,
284
34.7M
          -0.3675398009862027f,
285
34.7M
          -0.307882213957909f,
286
34.7M
          -0.17478669754808135f,
287
34.7M
          0.3675398009862011f,
288
34.7M
          0.0,
289
34.7M
          0.4826689115059883f,
290
34.7M
          -0.08126111767175039f,
291
34.7M
          0.30788221395790305f,
292
34.7M
          -0.48266891150598584f,
293
34.7M
          0.0,
294
34.7M
      },
295
34.7M
      {
296
34.7M
          0.0,
297
34.7M
          0.0,
298
34.7M
          -0.21105601049335784f,
299
34.7M
          0.18567180916109802f,
300
34.7M
          0.0,
301
34.7M
          0.0,
302
34.7M
          0.49215859013738733f,
303
34.7M
          -0.38525013709251915f,
304
34.7M
          0.21105601049335806f,
305
34.7M
          -0.49215859013738905f,
306
34.7M
          0.0,
307
34.7M
          0.17419412659916217f,
308
34.7M
          -0.18567180916109904f,
309
34.7M
          0.3852501370925211f,
310
34.7M
          -0.1741941265991621f,
311
34.7M
          0.0,
312
34.7M
      },
313
34.7M
      {
314
34.7M
          0.0,
315
34.7M
          0.0,
316
34.7M
          -0.14266084808807264f,
317
34.7M
          -0.3416446842253372f,
318
34.7M
          0.0,
319
34.7M
          0.7367497537172237f,
320
34.7M
          0.24627107722075148f,
321
34.7M
          -0.08574019035519306f,
322
34.7M
          -0.14266084808807344f,
323
34.7M
          0.24627107722075137f,
324
34.7M
          0.14883399227113567f,
325
34.7M
          -0.04768680350229251f,
326
34.7M
          -0.3416446842253373f,
327
34.7M
          -0.08574019035519267f,
328
34.7M
          -0.047686803502292804f,
329
34.7M
          -0.14266084808807242f,
330
34.7M
      },
331
34.7M
      {
332
34.7M
          0.0,
333
34.7M
          0.0,
334
34.7M
          -0.13813540350758585f,
335
34.7M
          0.3302282550303788f,
336
34.7M
          0.0,
337
34.7M
          0.08755115000587084f,
338
34.7M
          -0.07946706605909573f,
339
34.7M
          -0.4613374887461511f,
340
34.7M
          -0.13813540350758294f,
341
34.7M
          -0.07946706605910261f,
342
34.7M
          0.49724647109535086f,
343
34.7M
          0.12538059448563663f,
344
34.7M
          0.3302282550303805f,
345
34.7M
          -0.4613374887461554f,
346
34.7M
          0.12538059448564315f,
347
34.7M
          -0.13813540350758452f,
348
34.7M
      },
349
34.7M
      {
350
34.7M
          0.0,
351
34.7M
          0.0,
352
34.7M
          -0.17437602599651067f,
353
34.7M
          0.0702790691196284f,
354
34.7M
          0.0,
355
34.7M
          -0.2921026642334881f,
356
34.7M
          0.3623817333531167f,
357
34.7M
          0.0,
358
34.7M
          -0.1743760259965108f,
359
34.7M
          0.36238173335311646f,
360
34.7M
          0.29210266423348785f,
361
34.7M
          -0.4326608024727445f,
362
34.7M
          0.07027906911962818f,
363
34.7M
          0.0,
364
34.7M
          -0.4326608024727457f,
365
34.7M
          0.34875205199302267f,
366
34.7M
      },
367
34.7M
      {
368
34.7M
          0.0,
369
34.7M
          0.0,
370
34.7M
          0.11354987314994337f,
371
34.7M
          -0.07417504595810355f,
372
34.7M
          0.0,
373
34.7M
          0.19402893032594343f,
374
34.7M
          -0.435190496523228f,
375
34.7M
          0.21918684838857466f,
376
34.7M
          0.11354987314994257f,
377
34.7M
          -0.4351904965232251f,
378
34.7M
          0.5550443808910661f,
379
34.7M
          -0.25468277124066463f,
380
34.7M
          -0.07417504595810233f,
381
34.7M
          0.2191868483885728f,
382
34.7M
          -0.25468277124066413f,
383
34.7M
          0.1135498731499429f,
384
34.7M
      },
385
34.7M
  };
386
387
34.7M
  const HWY_CAPPED(float, 16) d;
388
104M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
69.5M
    auto pixel = Zero(d);
390
1.18G
    for (size_t j = 0; j < 16; j++) {
391
1.11G
      auto cf = Set(d, coeffs[j]);
392
1.11G
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
1.11G
      pixel = MulAdd(cf, basis, pixel);
394
1.11G
    }
395
69.5M
    Store(pixel, d, pixels + i);
396
69.5M
  }
397
34.7M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
398
399
template <size_t afv_kind>
400
void AFVTransformToPixels(const float* JXL_RESTRICT coefficients,
401
36.8M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
36.8M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
36.8M
  size_t afv_x = afv_kind & 1;
404
36.8M
  size_t afv_y = afv_kind / 2;
405
36.8M
  float dcs[3] = {};
406
36.8M
  float block00 = coefficients[0];
407
36.8M
  float block01 = coefficients[1];
408
36.8M
  float block10 = coefficients[8];
409
36.8M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
36.8M
  dcs[1] = (block00 + block10 - block01);
411
36.8M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
36.8M
  HWY_ALIGN float coeff[4 * 4];
414
36.8M
  coeff[0] = dcs[0];
415
184M
  for (size_t iy = 0; iy < 4; iy++) {
416
737M
    for (size_t ix = 0; ix < 4; ix++) {
417
589M
      if (ix == 0 && iy == 0) continue;
418
552M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
552M
    }
420
147M
  }
421
36.8M
  HWY_ALIGN float block[4 * 8];
422
36.8M
  AFVIDCT4x4(coeff, block);
423
184M
  for (size_t iy = 0; iy < 4; iy++) {
424
737M
    for (size_t ix = 0; ix < 4; ix++) {
425
589M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
589M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
589M
    }
428
147M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
36.8M
  block[0] = dcs[1];
431
184M
  for (size_t iy = 0; iy < 4; iy++) {
432
737M
    for (size_t ix = 0; ix < 4; ix++) {
433
589M
      if (ix == 0 && iy == 0) continue;
434
552M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
552M
    }
436
147M
  }
437
36.8M
  ComputeScaledIDCT<4, 4>()(
438
36.8M
      block,
439
36.8M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
36.8M
            pixels_stride),
441
36.8M
      scratch_space);
442
  // IDCT4x8.
443
36.8M
  block[0] = dcs[2];
444
184M
  for (size_t iy = 0; iy < 4; iy++) {
445
1.32G
    for (size_t ix = 0; ix < 8; ix++) {
446
1.17G
      if (ix == 0 && iy == 0) continue;
447
1.14G
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
1.14G
    }
449
147M
  }
450
36.8M
  ComputeScaledIDCT<4, 8>()(
451
36.8M
      block,
452
36.8M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
36.8M
      scratch_space);
454
36.8M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
536k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
536k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
536k
  size_t afv_x = afv_kind & 1;
404
536k
  size_t afv_y = afv_kind / 2;
405
536k
  float dcs[3] = {};
406
536k
  float block00 = coefficients[0];
407
536k
  float block01 = coefficients[1];
408
536k
  float block10 = coefficients[8];
409
536k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
536k
  dcs[1] = (block00 + block10 - block01);
411
536k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
536k
  HWY_ALIGN float coeff[4 * 4];
414
536k
  coeff[0] = dcs[0];
415
2.68M
  for (size_t iy = 0; iy < 4; iy++) {
416
10.7M
    for (size_t ix = 0; ix < 4; ix++) {
417
8.58M
      if (ix == 0 && iy == 0) continue;
418
8.05M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
8.05M
    }
420
2.14M
  }
421
536k
  HWY_ALIGN float block[4 * 8];
422
536k
  AFVIDCT4x4(coeff, block);
423
2.68M
  for (size_t iy = 0; iy < 4; iy++) {
424
10.7M
    for (size_t ix = 0; ix < 4; ix++) {
425
8.58M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
8.58M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
8.58M
    }
428
2.14M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
536k
  block[0] = dcs[1];
431
2.68M
  for (size_t iy = 0; iy < 4; iy++) {
432
10.7M
    for (size_t ix = 0; ix < 4; ix++) {
433
8.58M
      if (ix == 0 && iy == 0) continue;
434
8.05M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
8.05M
    }
436
2.14M
  }
437
536k
  ComputeScaledIDCT<4, 4>()(
438
536k
      block,
439
536k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
536k
            pixels_stride),
441
536k
      scratch_space);
442
  // IDCT4x8.
443
536k
  block[0] = dcs[2];
444
2.68M
  for (size_t iy = 0; iy < 4; iy++) {
445
19.3M
    for (size_t ix = 0; ix < 8; ix++) {
446
17.1M
      if (ix == 0 && iy == 0) continue;
447
16.6M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
16.6M
    }
449
2.14M
  }
450
536k
  ComputeScaledIDCT<4, 8>()(
451
536k
      block,
452
536k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
536k
      scratch_space);
454
536k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
428k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
428k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
428k
  size_t afv_x = afv_kind & 1;
404
428k
  size_t afv_y = afv_kind / 2;
405
428k
  float dcs[3] = {};
406
428k
  float block00 = coefficients[0];
407
428k
  float block01 = coefficients[1];
408
428k
  float block10 = coefficients[8];
409
428k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
428k
  dcs[1] = (block00 + block10 - block01);
411
428k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
428k
  HWY_ALIGN float coeff[4 * 4];
414
428k
  coeff[0] = dcs[0];
415
2.14M
  for (size_t iy = 0; iy < 4; iy++) {
416
8.57M
    for (size_t ix = 0; ix < 4; ix++) {
417
6.86M
      if (ix == 0 && iy == 0) continue;
418
6.43M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
6.43M
    }
420
1.71M
  }
421
428k
  HWY_ALIGN float block[4 * 8];
422
428k
  AFVIDCT4x4(coeff, block);
423
2.14M
  for (size_t iy = 0; iy < 4; iy++) {
424
8.57M
    for (size_t ix = 0; ix < 4; ix++) {
425
6.86M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
6.86M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
6.86M
    }
428
1.71M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
428k
  block[0] = dcs[1];
431
2.14M
  for (size_t iy = 0; iy < 4; iy++) {
432
8.57M
    for (size_t ix = 0; ix < 4; ix++) {
433
6.86M
      if (ix == 0 && iy == 0) continue;
434
6.43M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
6.43M
    }
436
1.71M
  }
437
428k
  ComputeScaledIDCT<4, 4>()(
438
428k
      block,
439
428k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
428k
            pixels_stride),
441
428k
      scratch_space);
442
  // IDCT4x8.
443
428k
  block[0] = dcs[2];
444
2.14M
  for (size_t iy = 0; iy < 4; iy++) {
445
15.4M
    for (size_t ix = 0; ix < 8; ix++) {
446
13.7M
      if (ix == 0 && iy == 0) continue;
447
13.2M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
13.2M
    }
449
1.71M
  }
450
428k
  ComputeScaledIDCT<4, 8>()(
451
428k
      block,
452
428k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
428k
      scratch_space);
454
428k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
511k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
511k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
511k
  size_t afv_x = afv_kind & 1;
404
511k
  size_t afv_y = afv_kind / 2;
405
511k
  float dcs[3] = {};
406
511k
  float block00 = coefficients[0];
407
511k
  float block01 = coefficients[1];
408
511k
  float block10 = coefficients[8];
409
511k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
511k
  dcs[1] = (block00 + block10 - block01);
411
511k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
511k
  HWY_ALIGN float coeff[4 * 4];
414
511k
  coeff[0] = dcs[0];
415
2.55M
  for (size_t iy = 0; iy < 4; iy++) {
416
10.2M
    for (size_t ix = 0; ix < 4; ix++) {
417
8.18M
      if (ix == 0 && iy == 0) continue;
418
7.66M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
7.66M
    }
420
2.04M
  }
421
511k
  HWY_ALIGN float block[4 * 8];
422
511k
  AFVIDCT4x4(coeff, block);
423
2.55M
  for (size_t iy = 0; iy < 4; iy++) {
424
10.2M
    for (size_t ix = 0; ix < 4; ix++) {
425
8.18M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
8.18M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
8.18M
    }
428
2.04M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
511k
  block[0] = dcs[1];
431
2.55M
  for (size_t iy = 0; iy < 4; iy++) {
432
10.2M
    for (size_t ix = 0; ix < 4; ix++) {
433
8.18M
      if (ix == 0 && iy == 0) continue;
434
7.66M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
7.66M
    }
436
2.04M
  }
437
511k
  ComputeScaledIDCT<4, 4>()(
438
511k
      block,
439
511k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
511k
            pixels_stride),
441
511k
      scratch_space);
442
  // IDCT4x8.
443
511k
  block[0] = dcs[2];
444
2.55M
  for (size_t iy = 0; iy < 4; iy++) {
445
18.4M
    for (size_t ix = 0; ix < 8; ix++) {
446
16.3M
      if (ix == 0 && iy == 0) continue;
447
15.8M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
15.8M
    }
449
2.04M
  }
450
511k
  ComputeScaledIDCT<4, 8>()(
451
511k
      block,
452
511k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
511k
      scratch_space);
454
511k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
581k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
581k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
581k
  size_t afv_x = afv_kind & 1;
404
581k
  size_t afv_y = afv_kind / 2;
405
581k
  float dcs[3] = {};
406
581k
  float block00 = coefficients[0];
407
581k
  float block01 = coefficients[1];
408
581k
  float block10 = coefficients[8];
409
581k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
581k
  dcs[1] = (block00 + block10 - block01);
411
581k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
581k
  HWY_ALIGN float coeff[4 * 4];
414
581k
  coeff[0] = dcs[0];
415
2.90M
  for (size_t iy = 0; iy < 4; iy++) {
416
11.6M
    for (size_t ix = 0; ix < 4; ix++) {
417
9.30M
      if (ix == 0 && iy == 0) continue;
418
8.72M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
8.72M
    }
420
2.32M
  }
421
581k
  HWY_ALIGN float block[4 * 8];
422
581k
  AFVIDCT4x4(coeff, block);
423
2.90M
  for (size_t iy = 0; iy < 4; iy++) {
424
11.6M
    for (size_t ix = 0; ix < 4; ix++) {
425
9.30M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
9.30M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
9.30M
    }
428
2.32M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
581k
  block[0] = dcs[1];
431
2.90M
  for (size_t iy = 0; iy < 4; iy++) {
432
11.6M
    for (size_t ix = 0; ix < 4; ix++) {
433
9.30M
      if (ix == 0 && iy == 0) continue;
434
8.72M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
8.72M
    }
436
2.32M
  }
437
581k
  ComputeScaledIDCT<4, 4>()(
438
581k
      block,
439
581k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
581k
            pixels_stride),
441
581k
      scratch_space);
442
  // IDCT4x8.
443
581k
  block[0] = dcs[2];
444
2.90M
  for (size_t iy = 0; iy < 4; iy++) {
445
20.9M
    for (size_t ix = 0; ix < 8; ix++) {
446
18.6M
      if (ix == 0 && iy == 0) continue;
447
18.0M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
18.0M
    }
449
2.32M
  }
450
581k
  ComputeScaledIDCT<4, 8>()(
451
581k
      block,
452
581k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
581k
      scratch_space);
454
581k
}
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
8.69M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
8.69M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
8.69M
  size_t afv_x = afv_kind & 1;
404
8.69M
  size_t afv_y = afv_kind / 2;
405
8.69M
  float dcs[3] = {};
406
8.69M
  float block00 = coefficients[0];
407
8.69M
  float block01 = coefficients[1];
408
8.69M
  float block10 = coefficients[8];
409
8.69M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
8.69M
  dcs[1] = (block00 + block10 - block01);
411
8.69M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
8.69M
  HWY_ALIGN float coeff[4 * 4];
414
8.69M
  coeff[0] = dcs[0];
415
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
416
173M
    for (size_t ix = 0; ix < 4; ix++) {
417
139M
      if (ix == 0 && iy == 0) continue;
418
130M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
130M
    }
420
34.7M
  }
421
8.69M
  HWY_ALIGN float block[4 * 8];
422
8.69M
  AFVIDCT4x4(coeff, block);
423
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
424
173M
    for (size_t ix = 0; ix < 4; ix++) {
425
139M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
139M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
139M
    }
428
34.7M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
8.69M
  block[0] = dcs[1];
431
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
432
173M
    for (size_t ix = 0; ix < 4; ix++) {
433
139M
      if (ix == 0 && iy == 0) continue;
434
130M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
130M
    }
436
34.7M
  }
437
8.69M
  ComputeScaledIDCT<4, 4>()(
438
8.69M
      block,
439
8.69M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
8.69M
            pixels_stride),
441
8.69M
      scratch_space);
442
  // IDCT4x8.
443
8.69M
  block[0] = dcs[2];
444
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
445
313M
    for (size_t ix = 0; ix < 8; ix++) {
446
278M
      if (ix == 0 && iy == 0) continue;
447
269M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
269M
    }
449
34.7M
  }
450
8.69M
  ComputeScaledIDCT<4, 8>()(
451
8.69M
      block,
452
8.69M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
8.69M
      scratch_space);
454
8.69M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
8.69M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
8.69M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
8.69M
  size_t afv_x = afv_kind & 1;
404
8.69M
  size_t afv_y = afv_kind / 2;
405
8.69M
  float dcs[3] = {};
406
8.69M
  float block00 = coefficients[0];
407
8.69M
  float block01 = coefficients[1];
408
8.69M
  float block10 = coefficients[8];
409
8.69M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
8.69M
  dcs[1] = (block00 + block10 - block01);
411
8.69M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
8.69M
  HWY_ALIGN float coeff[4 * 4];
414
8.69M
  coeff[0] = dcs[0];
415
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
416
173M
    for (size_t ix = 0; ix < 4; ix++) {
417
139M
      if (ix == 0 && iy == 0) continue;
418
130M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
130M
    }
420
34.7M
  }
421
8.69M
  HWY_ALIGN float block[4 * 8];
422
8.69M
  AFVIDCT4x4(coeff, block);
423
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
424
173M
    for (size_t ix = 0; ix < 4; ix++) {
425
139M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
139M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
139M
    }
428
34.7M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
8.69M
  block[0] = dcs[1];
431
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
432
173M
    for (size_t ix = 0; ix < 4; ix++) {
433
139M
      if (ix == 0 && iy == 0) continue;
434
130M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
130M
    }
436
34.7M
  }
437
8.69M
  ComputeScaledIDCT<4, 4>()(
438
8.69M
      block,
439
8.69M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
8.69M
            pixels_stride),
441
8.69M
      scratch_space);
442
  // IDCT4x8.
443
8.69M
  block[0] = dcs[2];
444
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
445
313M
    for (size_t ix = 0; ix < 8; ix++) {
446
278M
      if (ix == 0 && iy == 0) continue;
447
269M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
269M
    }
449
34.7M
  }
450
8.69M
  ComputeScaledIDCT<4, 8>()(
451
8.69M
      block,
452
8.69M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
8.69M
      scratch_space);
454
8.69M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
8.69M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
8.69M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
8.69M
  size_t afv_x = afv_kind & 1;
404
8.69M
  size_t afv_y = afv_kind / 2;
405
8.69M
  float dcs[3] = {};
406
8.69M
  float block00 = coefficients[0];
407
8.69M
  float block01 = coefficients[1];
408
8.69M
  float block10 = coefficients[8];
409
8.69M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
8.69M
  dcs[1] = (block00 + block10 - block01);
411
8.69M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
8.69M
  HWY_ALIGN float coeff[4 * 4];
414
8.69M
  coeff[0] = dcs[0];
415
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
416
173M
    for (size_t ix = 0; ix < 4; ix++) {
417
139M
      if (ix == 0 && iy == 0) continue;
418
130M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
130M
    }
420
34.7M
  }
421
8.69M
  HWY_ALIGN float block[4 * 8];
422
8.69M
  AFVIDCT4x4(coeff, block);
423
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
424
173M
    for (size_t ix = 0; ix < 4; ix++) {
425
139M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
139M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
139M
    }
428
34.7M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
8.69M
  block[0] = dcs[1];
431
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
432
173M
    for (size_t ix = 0; ix < 4; ix++) {
433
139M
      if (ix == 0 && iy == 0) continue;
434
130M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
130M
    }
436
34.7M
  }
437
8.69M
  ComputeScaledIDCT<4, 4>()(
438
8.69M
      block,
439
8.69M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
8.69M
            pixels_stride),
441
8.69M
      scratch_space);
442
  // IDCT4x8.
443
8.69M
  block[0] = dcs[2];
444
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
445
313M
    for (size_t ix = 0; ix < 8; ix++) {
446
278M
      if (ix == 0 && iy == 0) continue;
447
269M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
269M
    }
449
34.7M
  }
450
8.69M
  ComputeScaledIDCT<4, 8>()(
451
8.69M
      block,
452
8.69M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
8.69M
      scratch_space);
454
8.69M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
8.69M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
8.69M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
8.69M
  size_t afv_x = afv_kind & 1;
404
8.69M
  size_t afv_y = afv_kind / 2;
405
8.69M
  float dcs[3] = {};
406
8.69M
  float block00 = coefficients[0];
407
8.69M
  float block01 = coefficients[1];
408
8.69M
  float block10 = coefficients[8];
409
8.69M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
8.69M
  dcs[1] = (block00 + block10 - block01);
411
8.69M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
8.69M
  HWY_ALIGN float coeff[4 * 4];
414
8.69M
  coeff[0] = dcs[0];
415
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
416
173M
    for (size_t ix = 0; ix < 4; ix++) {
417
139M
      if (ix == 0 && iy == 0) continue;
418
130M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
130M
    }
420
34.7M
  }
421
8.69M
  HWY_ALIGN float block[4 * 8];
422
8.69M
  AFVIDCT4x4(coeff, block);
423
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
424
173M
    for (size_t ix = 0; ix < 4; ix++) {
425
139M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
139M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
139M
    }
428
34.7M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
8.69M
  block[0] = dcs[1];
431
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
432
173M
    for (size_t ix = 0; ix < 4; ix++) {
433
139M
      if (ix == 0 && iy == 0) continue;
434
130M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
130M
    }
436
34.7M
  }
437
8.69M
  ComputeScaledIDCT<4, 4>()(
438
8.69M
      block,
439
8.69M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
8.69M
            pixels_stride),
441
8.69M
      scratch_space);
442
  // IDCT4x8.
443
8.69M
  block[0] = dcs[2];
444
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
445
313M
    for (size_t ix = 0; ix < 8; ix++) {
446
278M
      if (ix == 0 && iy == 0) continue;
447
269M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
269M
    }
449
34.7M
  }
450
8.69M
  ComputeScaledIDCT<4, 8>()(
451
8.69M
      block,
452
8.69M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
8.69M
      scratch_space);
454
8.69M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
455
456
HWY_MAYBE_UNUSED void TransformToPixels(const AcStrategyType strategy,
457
                                        float* JXL_RESTRICT coefficients,
458
                                        float* JXL_RESTRICT pixels,
459
                                        size_t pixels_stride,
460
124M
                                        float* scratch_space) {
461
124M
  using Type = AcStrategyType;
462
124M
  switch (strategy) {
463
11.0M
    case Type::IDENTITY: {
464
11.0M
      float dcs[4] = {};
465
11.0M
      float block00 = coefficients[0];
466
11.0M
      float block01 = coefficients[1];
467
11.0M
      float block10 = coefficients[8];
468
11.0M
      float block11 = coefficients[9];
469
11.0M
      dcs[0] = block00 + block01 + block10 + block11;
470
11.0M
      dcs[1] = block00 + block01 - block10 - block11;
471
11.0M
      dcs[2] = block00 - block01 + block10 - block11;
472
11.0M
      dcs[3] = block00 - block01 - block10 + block11;
473
33.1M
      for (size_t y = 0; y < 2; y++) {
474
66.3M
        for (size_t x = 0; x < 2; x++) {
475
44.2M
          float block_dc = dcs[y * 2 + x];
476
44.2M
          float residual_sum = 0;
477
221M
          for (size_t iy = 0; iy < 4; iy++) {
478
885M
            for (size_t ix = 0; ix < 4; ix++) {
479
708M
              if (ix == 0 && iy == 0) continue;
480
663M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
663M
            }
482
177M
          }
483
44.2M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
44.2M
              block_dc - residual_sum * (1.0f / 16);
485
221M
          for (size_t iy = 0; iy < 4; iy++) {
486
885M
            for (size_t ix = 0; ix < 4; ix++) {
487
708M
              if (ix == 1 && iy == 1) continue;
488
663M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
663M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
663M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
663M
            }
492
177M
          }
493
44.2M
          pixels[y * 4 * pixels_stride + x * 4] =
494
44.2M
              coefficients[(y + 2) * 8 + x + 2] +
495
44.2M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
44.2M
        }
497
22.1M
      }
498
11.0M
      break;
499
0
    }
500
9.42M
    case Type::DCT8X4: {
501
9.42M
      float dcs[2] = {};
502
9.42M
      float block0 = coefficients[0];
503
9.42M
      float block1 = coefficients[8];
504
9.42M
      dcs[0] = block0 + block1;
505
9.42M
      dcs[1] = block0 - block1;
506
28.2M
      for (size_t x = 0; x < 2; x++) {
507
18.8M
        HWY_ALIGN float block[4 * 8];
508
18.8M
        block[0] = dcs[x];
509
94.2M
        for (size_t iy = 0; iy < 4; iy++) {
510
678M
          for (size_t ix = 0; ix < 8; ix++) {
511
603M
            if (ix == 0 && iy == 0) continue;
512
584M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
584M
          }
514
75.4M
        }
515
18.8M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
18.8M
                                  scratch_space);
517
18.8M
      }
518
9.42M
      break;
519
0
    }
520
9.03M
    case Type::DCT4X8: {
521
9.03M
      float dcs[2] = {};
522
9.03M
      float block0 = coefficients[0];
523
9.03M
      float block1 = coefficients[8];
524
9.03M
      dcs[0] = block0 + block1;
525
9.03M
      dcs[1] = block0 - block1;
526
27.1M
      for (size_t y = 0; y < 2; y++) {
527
18.0M
        HWY_ALIGN float block[4 * 8];
528
18.0M
        block[0] = dcs[y];
529
90.3M
        for (size_t iy = 0; iy < 4; iy++) {
530
650M
          for (size_t ix = 0; ix < 8; ix++) {
531
578M
            if (ix == 0 && iy == 0) continue;
532
560M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
560M
          }
534
72.2M
        }
535
18.0M
        ComputeScaledIDCT<4, 8>()(
536
18.0M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
18.0M
            scratch_space);
538
18.0M
      }
539
9.03M
      break;
540
0
    }
541
8.70M
    case Type::DCT4X4: {
542
8.70M
      float dcs[4] = {};
543
8.70M
      float block00 = coefficients[0];
544
8.70M
      float block01 = coefficients[1];
545
8.70M
      float block10 = coefficients[8];
546
8.70M
      float block11 = coefficients[9];
547
8.70M
      dcs[0] = block00 + block01 + block10 + block11;
548
8.70M
      dcs[1] = block00 + block01 - block10 - block11;
549
8.70M
      dcs[2] = block00 - block01 + block10 - block11;
550
8.70M
      dcs[3] = block00 - block01 - block10 + block11;
551
26.1M
      for (size_t y = 0; y < 2; y++) {
552
52.2M
        for (size_t x = 0; x < 2; x++) {
553
34.8M
          HWY_ALIGN float block[4 * 4];
554
34.8M
          block[0] = dcs[y * 2 + x];
555
174M
          for (size_t iy = 0; iy < 4; iy++) {
556
696M
            for (size_t ix = 0; ix < 4; ix++) {
557
556M
              if (ix == 0 && iy == 0) continue;
558
522M
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
522M
            }
560
139M
          }
561
34.8M
          ComputeScaledIDCT<4, 4>()(
562
34.8M
              block,
563
34.8M
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
34.8M
              scratch_space);
565
34.8M
        }
566
17.4M
      }
567
8.70M
      break;
568
0
    }
569
11.1M
    case Type::DCT2X2: {
570
11.1M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
11.1M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
11.1M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
11.1M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
11.1M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
100M
      for (size_t y = 0; y < kBlockDim; y++) {
576
804M
        for (size_t x = 0; x < kBlockDim; x++) {
577
715M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
715M
        }
579
89.4M
      }
580
11.1M
      break;
581
0
    }
582
4.12M
    case Type::DCT16X16: {
583
4.12M
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
4.12M
                                  scratch_space);
585
4.12M
      break;
586
0
    }
587
7.86M
    case Type::DCT16X8: {
588
7.86M
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
7.86M
                                 scratch_space);
590
7.86M
      break;
591
0
    }
592
8.01M
    case Type::DCT8X16: {
593
8.01M
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
8.01M
                                 scratch_space);
595
8.01M
      break;
596
0
    }
597
54
    case Type::DCT32X8: {
598
54
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
54
                                 scratch_space);
600
54
      break;
601
0
    }
602
9
    case Type::DCT8X32: {
603
9
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
9
                                 scratch_space);
605
9
      break;
606
0
    }
607
1.57M
    case Type::DCT32X16: {
608
1.57M
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
1.57M
                                  scratch_space);
610
1.57M
      break;
611
0
    }
612
1.66M
    case Type::DCT16X32: {
613
1.66M
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
1.66M
                                  scratch_space);
615
1.66M
      break;
616
0
    }
617
867k
    case Type::DCT32X32: {
618
867k
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
867k
                                  scratch_space);
620
867k
      break;
621
0
    }
622
12.9M
    case Type::DCT: {
623
12.9M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
12.9M
                                scratch_space);
625
12.9M
      break;
626
0
    }
627
9.23M
    case Type::AFV0: {
628
9.23M
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
9.23M
      break;
630
0
    }
631
9.12M
    case Type::AFV1: {
632
9.12M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
9.12M
      break;
634
0
    }
635
9.21M
    case Type::AFV2: {
636
9.21M
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
9.21M
      break;
638
0
    }
639
9.28M
    case Type::AFV3: {
640
9.28M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
9.28M
      break;
642
0
    }
643
466k
    case Type::DCT64X32: {
644
466k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
466k
                                  scratch_space);
646
466k
      break;
647
0
    }
648
330k
    case Type::DCT32X64: {
649
330k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
330k
                                  scratch_space);
651
330k
      break;
652
0
    }
653
180k
    case Type::DCT64X64: {
654
180k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
180k
                                  scratch_space);
656
180k
      break;
657
0
    }
658
0
    case Type::DCT128X64: {
659
0
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT64X128: {
664
0
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
0
    case Type::DCT128X128: {
669
0
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
0
                                    scratch_space);
671
0
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
124M
  }
689
124M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
14.7M
                                        float* scratch_space) {
461
14.7M
  using Type = AcStrategyType;
462
14.7M
  switch (strategy) {
463
2.36M
    case Type::IDENTITY: {
464
2.36M
      float dcs[4] = {};
465
2.36M
      float block00 = coefficients[0];
466
2.36M
      float block01 = coefficients[1];
467
2.36M
      float block10 = coefficients[8];
468
2.36M
      float block11 = coefficients[9];
469
2.36M
      dcs[0] = block00 + block01 + block10 + block11;
470
2.36M
      dcs[1] = block00 + block01 - block10 - block11;
471
2.36M
      dcs[2] = block00 - block01 + block10 - block11;
472
2.36M
      dcs[3] = block00 - block01 - block10 + block11;
473
7.09M
      for (size_t y = 0; y < 2; y++) {
474
14.1M
        for (size_t x = 0; x < 2; x++) {
475
9.45M
          float block_dc = dcs[y * 2 + x];
476
9.45M
          float residual_sum = 0;
477
47.2M
          for (size_t iy = 0; iy < 4; iy++) {
478
189M
            for (size_t ix = 0; ix < 4; ix++) {
479
151M
              if (ix == 0 && iy == 0) continue;
480
141M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
141M
            }
482
37.8M
          }
483
9.45M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
9.45M
              block_dc - residual_sum * (1.0f / 16);
485
47.2M
          for (size_t iy = 0; iy < 4; iy++) {
486
189M
            for (size_t ix = 0; ix < 4; ix++) {
487
151M
              if (ix == 1 && iy == 1) continue;
488
141M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
141M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
141M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
141M
            }
492
37.8M
          }
493
9.45M
          pixels[y * 4 * pixels_stride + x * 4] =
494
9.45M
              coefficients[(y + 2) * 8 + x + 2] +
495
9.45M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
9.45M
        }
497
4.72M
      }
498
2.36M
      break;
499
0
    }
500
729k
    case Type::DCT8X4: {
501
729k
      float dcs[2] = {};
502
729k
      float block0 = coefficients[0];
503
729k
      float block1 = coefficients[8];
504
729k
      dcs[0] = block0 + block1;
505
729k
      dcs[1] = block0 - block1;
506
2.18M
      for (size_t x = 0; x < 2; x++) {
507
1.45M
        HWY_ALIGN float block[4 * 8];
508
1.45M
        block[0] = dcs[x];
509
7.29M
        for (size_t iy = 0; iy < 4; iy++) {
510
52.5M
          for (size_t ix = 0; ix < 8; ix++) {
511
46.7M
            if (ix == 0 && iy == 0) continue;
512
45.2M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
45.2M
          }
514
5.83M
        }
515
1.45M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
1.45M
                                  scratch_space);
517
1.45M
      }
518
729k
      break;
519
0
    }
520
335k
    case Type::DCT4X8: {
521
335k
      float dcs[2] = {};
522
335k
      float block0 = coefficients[0];
523
335k
      float block1 = coefficients[8];
524
335k
      dcs[0] = block0 + block1;
525
335k
      dcs[1] = block0 - block1;
526
1.00M
      for (size_t y = 0; y < 2; y++) {
527
671k
        HWY_ALIGN float block[4 * 8];
528
671k
        block[0] = dcs[y];
529
3.35M
        for (size_t iy = 0; iy < 4; iy++) {
530
24.1M
          for (size_t ix = 0; ix < 8; ix++) {
531
21.4M
            if (ix == 0 && iy == 0) continue;
532
20.8M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
20.8M
          }
534
2.68M
        }
535
671k
        ComputeScaledIDCT<4, 8>()(
536
671k
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
671k
            scratch_space);
538
671k
      }
539
335k
      break;
540
0
    }
541
3.39k
    case Type::DCT4X4: {
542
3.39k
      float dcs[4] = {};
543
3.39k
      float block00 = coefficients[0];
544
3.39k
      float block01 = coefficients[1];
545
3.39k
      float block10 = coefficients[8];
546
3.39k
      float block11 = coefficients[9];
547
3.39k
      dcs[0] = block00 + block01 + block10 + block11;
548
3.39k
      dcs[1] = block00 + block01 - block10 - block11;
549
3.39k
      dcs[2] = block00 - block01 + block10 - block11;
550
3.39k
      dcs[3] = block00 - block01 - block10 + block11;
551
10.1k
      for (size_t y = 0; y < 2; y++) {
552
20.3k
        for (size_t x = 0; x < 2; x++) {
553
13.5k
          HWY_ALIGN float block[4 * 4];
554
13.5k
          block[0] = dcs[y * 2 + x];
555
67.9k
          for (size_t iy = 0; iy < 4; iy++) {
556
271k
            for (size_t ix = 0; ix < 4; ix++) {
557
217k
              if (ix == 0 && iy == 0) continue;
558
203k
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
203k
            }
560
54.3k
          }
561
13.5k
          ComputeScaledIDCT<4, 4>()(
562
13.5k
              block,
563
13.5k
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
13.5k
              scratch_space);
565
13.5k
        }
566
6.79k
      }
567
3.39k
      break;
568
0
    }
569
2.48M
    case Type::DCT2X2: {
570
2.48M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
2.48M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
2.48M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
2.48M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
2.48M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
22.3M
      for (size_t y = 0; y < kBlockDim; y++) {
576
178M
        for (size_t x = 0; x < kBlockDim; x++) {
577
158M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
158M
        }
579
19.8M
      }
580
2.48M
      break;
581
0
    }
582
437k
    case Type::DCT16X16: {
583
437k
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
437k
                                  scratch_space);
585
437k
      break;
586
0
    }
587
708k
    case Type::DCT16X8: {
588
708k
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
708k
                                 scratch_space);
590
708k
      break;
591
0
    }
592
839k
    case Type::DCT8X16: {
593
839k
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
839k
                                 scratch_space);
595
839k
      break;
596
0
    }
597
54
    case Type::DCT32X8: {
598
54
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
54
                                 scratch_space);
600
54
      break;
601
0
    }
602
9
    case Type::DCT8X32: {
603
9
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
9
                                 scratch_space);
605
9
      break;
606
0
    }
607
123k
    case Type::DCT32X16: {
608
123k
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
123k
                                  scratch_space);
610
123k
      break;
611
0
    }
612
210k
    case Type::DCT16X32: {
613
210k
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
210k
                                  scratch_space);
615
210k
      break;
616
0
    }
617
116k
    case Type::DCT32X32: {
618
116k
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
116k
                                  scratch_space);
620
116k
      break;
621
0
    }
622
4.21M
    case Type::DCT: {
623
4.21M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
4.21M
                                scratch_space);
625
4.21M
      break;
626
0
    }
627
536k
    case Type::AFV0: {
628
536k
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
536k
      break;
630
0
    }
631
428k
    case Type::AFV1: {
632
428k
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
428k
      break;
634
0
    }
635
511k
    case Type::AFV2: {
636
511k
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
511k
      break;
638
0
    }
639
581k
    case Type::AFV3: {
640
581k
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
581k
      break;
642
0
    }
643
14.3k
    case Type::DCT64X32: {
644
14.3k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
14.3k
                                  scratch_space);
646
14.3k
      break;
647
0
    }
648
22.0k
    case Type::DCT32X64: {
649
22.0k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
22.0k
                                  scratch_space);
651
22.0k
      break;
652
0
    }
653
58.7k
    case Type::DCT64X64: {
654
58.7k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
58.7k
                                  scratch_space);
656
58.7k
      break;
657
0
    }
658
0
    case Type::DCT128X64: {
659
0
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT64X128: {
664
0
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
0
    case Type::DCT128X128: {
669
0
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
0
                                    scratch_space);
671
0
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
14.7M
  }
689
14.7M
}
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
109M
                                        float* scratch_space) {
461
109M
  using Type = AcStrategyType;
462
109M
  switch (strategy) {
463
8.69M
    case Type::IDENTITY: {
464
8.69M
      float dcs[4] = {};
465
8.69M
      float block00 = coefficients[0];
466
8.69M
      float block01 = coefficients[1];
467
8.69M
      float block10 = coefficients[8];
468
8.69M
      float block11 = coefficients[9];
469
8.69M
      dcs[0] = block00 + block01 + block10 + block11;
470
8.69M
      dcs[1] = block00 + block01 - block10 - block11;
471
8.69M
      dcs[2] = block00 - block01 + block10 - block11;
472
8.69M
      dcs[3] = block00 - block01 - block10 + block11;
473
26.0M
      for (size_t y = 0; y < 2; y++) {
474
52.1M
        for (size_t x = 0; x < 2; x++) {
475
34.7M
          float block_dc = dcs[y * 2 + x];
476
34.7M
          float residual_sum = 0;
477
173M
          for (size_t iy = 0; iy < 4; iy++) {
478
695M
            for (size_t ix = 0; ix < 4; ix++) {
479
556M
              if (ix == 0 && iy == 0) continue;
480
521M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
521M
            }
482
139M
          }
483
34.7M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
34.7M
              block_dc - residual_sum * (1.0f / 16);
485
173M
          for (size_t iy = 0; iy < 4; iy++) {
486
695M
            for (size_t ix = 0; ix < 4; ix++) {
487
556M
              if (ix == 1 && iy == 1) continue;
488
521M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
521M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
521M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
521M
            }
492
139M
          }
493
34.7M
          pixels[y * 4 * pixels_stride + x * 4] =
494
34.7M
              coefficients[(y + 2) * 8 + x + 2] +
495
34.7M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
34.7M
        }
497
17.3M
      }
498
8.69M
      break;
499
0
    }
500
8.69M
    case Type::DCT8X4: {
501
8.69M
      float dcs[2] = {};
502
8.69M
      float block0 = coefficients[0];
503
8.69M
      float block1 = coefficients[8];
504
8.69M
      dcs[0] = block0 + block1;
505
8.69M
      dcs[1] = block0 - block1;
506
26.0M
      for (size_t x = 0; x < 2; x++) {
507
17.3M
        HWY_ALIGN float block[4 * 8];
508
17.3M
        block[0] = dcs[x];
509
86.9M
        for (size_t iy = 0; iy < 4; iy++) {
510
626M
          for (size_t ix = 0; ix < 8; ix++) {
511
556M
            if (ix == 0 && iy == 0) continue;
512
539M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
539M
          }
514
69.5M
        }
515
17.3M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
17.3M
                                  scratch_space);
517
17.3M
      }
518
8.69M
      break;
519
0
    }
520
8.69M
    case Type::DCT4X8: {
521
8.69M
      float dcs[2] = {};
522
8.69M
      float block0 = coefficients[0];
523
8.69M
      float block1 = coefficients[8];
524
8.69M
      dcs[0] = block0 + block1;
525
8.69M
      dcs[1] = block0 - block1;
526
26.0M
      for (size_t y = 0; y < 2; y++) {
527
17.3M
        HWY_ALIGN float block[4 * 8];
528
17.3M
        block[0] = dcs[y];
529
86.9M
        for (size_t iy = 0; iy < 4; iy++) {
530
626M
          for (size_t ix = 0; ix < 8; ix++) {
531
556M
            if (ix == 0 && iy == 0) continue;
532
539M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
539M
          }
534
69.5M
        }
535
17.3M
        ComputeScaledIDCT<4, 8>()(
536
17.3M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
17.3M
            scratch_space);
538
17.3M
      }
539
8.69M
      break;
540
0
    }
541
8.69M
    case Type::DCT4X4: {
542
8.69M
      float dcs[4] = {};
543
8.69M
      float block00 = coefficients[0];
544
8.69M
      float block01 = coefficients[1];
545
8.69M
      float block10 = coefficients[8];
546
8.69M
      float block11 = coefficients[9];
547
8.69M
      dcs[0] = block00 + block01 + block10 + block11;
548
8.69M
      dcs[1] = block00 + block01 - block10 - block11;
549
8.69M
      dcs[2] = block00 - block01 + block10 - block11;
550
8.69M
      dcs[3] = block00 - block01 - block10 + block11;
551
26.0M
      for (size_t y = 0; y < 2; y++) {
552
52.1M
        for (size_t x = 0; x < 2; x++) {
553
34.7M
          HWY_ALIGN float block[4 * 4];
554
34.7M
          block[0] = dcs[y * 2 + x];
555
173M
          for (size_t iy = 0; iy < 4; iy++) {
556
695M
            for (size_t ix = 0; ix < 4; ix++) {
557
556M
              if (ix == 0 && iy == 0) continue;
558
521M
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
521M
            }
560
139M
          }
561
34.7M
          ComputeScaledIDCT<4, 4>()(
562
34.7M
              block,
563
34.7M
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
34.7M
              scratch_space);
565
34.7M
        }
566
17.3M
      }
567
8.69M
      break;
568
0
    }
569
8.69M
    case Type::DCT2X2: {
570
8.69M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
8.69M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
8.69M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
8.69M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
8.69M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
78.2M
      for (size_t y = 0; y < kBlockDim; y++) {
576
626M
        for (size_t x = 0; x < kBlockDim; x++) {
577
556M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
556M
        }
579
69.5M
      }
580
8.69M
      break;
581
0
    }
582
3.68M
    case Type::DCT16X16: {
583
3.68M
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
3.68M
                                  scratch_space);
585
3.68M
      break;
586
0
    }
587
7.15M
    case Type::DCT16X8: {
588
7.15M
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
7.15M
                                 scratch_space);
590
7.15M
      break;
591
0
    }
592
7.17M
    case Type::DCT8X16: {
593
7.17M
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
7.17M
                                 scratch_space);
595
7.17M
      break;
596
0
    }
597
0
    case Type::DCT32X8: {
598
0
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
0
                                 scratch_space);
600
0
      break;
601
0
    }
602
0
    case Type::DCT8X32: {
603
0
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
0
                                 scratch_space);
605
0
      break;
606
0
    }
607
1.45M
    case Type::DCT32X16: {
608
1.45M
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
1.45M
                                  scratch_space);
610
1.45M
      break;
611
0
    }
612
1.45M
    case Type::DCT16X32: {
613
1.45M
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
1.45M
                                  scratch_space);
615
1.45M
      break;
616
0
    }
617
750k
    case Type::DCT32X32: {
618
750k
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
750k
                                  scratch_space);
620
750k
      break;
621
0
    }
622
8.69M
    case Type::DCT: {
623
8.69M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
8.69M
                                scratch_space);
625
8.69M
      break;
626
0
    }
627
8.69M
    case Type::AFV0: {
628
8.69M
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
8.69M
      break;
630
0
    }
631
8.69M
    case Type::AFV1: {
632
8.69M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
8.69M
      break;
634
0
    }
635
8.69M
    case Type::AFV2: {
636
8.69M
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
8.69M
      break;
638
0
    }
639
8.69M
    case Type::AFV3: {
640
8.69M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
8.69M
      break;
642
0
    }
643
452k
    case Type::DCT64X32: {
644
452k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
452k
                                  scratch_space);
646
452k
      break;
647
0
    }
648
308k
    case Type::DCT32X64: {
649
308k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
308k
                                  scratch_space);
651
308k
      break;
652
0
    }
653
121k
    case Type::DCT64X64: {
654
121k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
121k
                                  scratch_space);
656
121k
      break;
657
0
    }
658
0
    case Type::DCT128X64: {
659
0
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT64X128: {
664
0
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
0
    case Type::DCT128X128: {
669
0
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
0
                                    scratch_space);
671
0
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
109M
  }
689
109M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
690
691
HWY_MAYBE_UNUSED void LowestFrequenciesFromDC(const AcStrategyType strategy,
692
                                              const float* dc, size_t dc_stride,
693
                                              float* llf,
694
14.7M
                                              float* JXL_RESTRICT scratch) {
695
14.7M
  using Type = AcStrategyType;
696
14.7M
  HWY_ALIGN float warm_block[4 * 4];
697
14.7M
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
14.7M
  switch (strategy) {
699
708k
    case Type::DCT16X8: {
700
708k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
708k
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
708k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
708k
      break;
704
0
    }
705
839k
    case Type::DCT8X16: {
706
839k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
839k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
839k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
839k
      break;
710
0
    }
711
437k
    case Type::DCT16X16: {
712
437k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
437k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
437k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
437k
      break;
716
0
    }
717
54
    case Type::DCT32X8: {
718
54
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
54
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
54
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
54
      break;
722
0
    }
723
9
    case Type::DCT8X32: {
724
9
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
9
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
9
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
9
      break;
728
0
    }
729
123k
    case Type::DCT32X16: {
730
123k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
123k
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
123k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
123k
      break;
734
0
    }
735
210k
    case Type::DCT16X32: {
736
210k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
210k
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
210k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
210k
      break;
740
0
    }
741
116k
    case Type::DCT32X32: {
742
116k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
116k
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
116k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
116k
      break;
746
0
    }
747
14.3k
    case Type::DCT64X32: {
748
14.3k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
14.3k
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
14.3k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
14.3k
      break;
752
0
    }
753
22.0k
    case Type::DCT32X64: {
754
22.0k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
22.0k
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
22.0k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
22.0k
      break;
758
0
    }
759
58.7k
    case Type::DCT64X64: {
760
58.7k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
58.7k
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
58.7k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
58.7k
      break;
764
0
    }
765
0
    case Type::DCT128X64: {
766
0
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
0
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
0
      break;
770
0
    }
771
0
    case Type::DCT64X128: {
772
0
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
0
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
0
      break;
776
0
    }
777
0
    case Type::DCT128X128: {
778
0
      ReinterpretingDCT<
779
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
0
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
0
    case Type::DCT256X256: {
799
0
      ReinterpretingDCT<
800
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
0
      break;
804
0
    }
805
4.21M
    case Type::DCT:
806
6.69M
    case Type::DCT2X2:
807
6.69M
    case Type::DCT4X4:
808
7.03M
    case Type::DCT4X8:
809
7.76M
    case Type::DCT8X4:
810
8.29M
    case Type::AFV0:
811
8.72M
    case Type::AFV1:
812
9.23M
    case Type::AFV2:
813
9.82M
    case Type::AFV3:
814
12.1M
    case Type::IDENTITY:
815
12.1M
      llf[0] = dc[0];
816
12.1M
      break;
817
14.7M
  };
818
14.7M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
694
14.7M
                                              float* JXL_RESTRICT scratch) {
695
14.7M
  using Type = AcStrategyType;
696
14.7M
  HWY_ALIGN float warm_block[4 * 4];
697
14.7M
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
14.7M
  switch (strategy) {
699
708k
    case Type::DCT16X8: {
700
708k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
708k
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
708k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
708k
      break;
704
0
    }
705
839k
    case Type::DCT8X16: {
706
839k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
839k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
839k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
839k
      break;
710
0
    }
711
437k
    case Type::DCT16X16: {
712
437k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
437k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
437k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
437k
      break;
716
0
    }
717
54
    case Type::DCT32X8: {
718
54
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
54
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
54
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
54
      break;
722
0
    }
723
9
    case Type::DCT8X32: {
724
9
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
9
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
9
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
9
      break;
728
0
    }
729
123k
    case Type::DCT32X16: {
730
123k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
123k
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
123k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
123k
      break;
734
0
    }
735
210k
    case Type::DCT16X32: {
736
210k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
210k
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
210k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
210k
      break;
740
0
    }
741
116k
    case Type::DCT32X32: {
742
116k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
116k
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
116k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
116k
      break;
746
0
    }
747
14.3k
    case Type::DCT64X32: {
748
14.3k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
14.3k
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
14.3k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
14.3k
      break;
752
0
    }
753
22.0k
    case Type::DCT32X64: {
754
22.0k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
22.0k
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
22.0k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
22.0k
      break;
758
0
    }
759
58.7k
    case Type::DCT64X64: {
760
58.7k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
58.7k
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
58.7k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
58.7k
      break;
764
0
    }
765
0
    case Type::DCT128X64: {
766
0
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
0
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
0
      break;
770
0
    }
771
0
    case Type::DCT64X128: {
772
0
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
0
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
0
      break;
776
0
    }
777
0
    case Type::DCT128X128: {
778
0
      ReinterpretingDCT<
779
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
0
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
0
    case Type::DCT256X256: {
799
0
      ReinterpretingDCT<
800
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
0
      break;
804
0
    }
805
4.21M
    case Type::DCT:
806
6.69M
    case Type::DCT2X2:
807
6.69M
    case Type::DCT4X4:
808
7.03M
    case Type::DCT4X8:
809
7.76M
    case Type::DCT8X4:
810
8.29M
    case Type::AFV0:
811
8.72M
    case Type::AFV1:
812
9.23M
    case Type::AFV2:
813
9.82M
    case Type::AFV3:
814
12.1M
    case Type::IDENTITY:
815
12.1M
      llf[0] = dc[0];
816
12.1M
      break;
817
14.7M
  };
818
14.7M
}
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
819
820
}  // namespace
821
// NOLINTNEXTLINE(google-readability-namespace-comments)
822
}  // namespace HWY_NAMESPACE
823
}  // namespace jxl
824
HWY_AFTER_NAMESPACE();
825
826
#endif  // LIB_JXL_DEC_TRANSFORMS_INL_H_