Coverage Report

Created: 2025-12-03 07:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/dec_transforms-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include <cstring>
7
8
#include "lib/jxl/base/compiler_specific.h"
9
#include "lib/jxl/frame_dimensions.h"
10
11
#if defined(LIB_JXL_DEC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
12
#ifdef LIB_JXL_DEC_TRANSFORMS_INL_H_
13
#undef LIB_JXL_DEC_TRANSFORMS_INL_H_
14
#else
15
#define LIB_JXL_DEC_TRANSFORMS_INL_H_
16
#endif
17
18
#include <cstddef>
19
#include <hwy/highway.h>
20
21
#include "lib/jxl/ac_strategy.h"
22
#include "lib/jxl/dct-inl.h"
23
#include "lib/jxl/dct_scales.h"
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
namespace HWY_NAMESPACE {
27
namespace {
28
29
// These templates are not found via ADL.
30
using hwy::HWY_NAMESPACE::MulAdd;
31
32
// Computes the lowest-frequency LF_ROWSxLF_COLS-sized square in output, which
33
// is a DCT_ROWS*DCT_COLS-sized DCT block, by doing a ROWS*COLS DCT on the
34
// input block.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
JXL_INLINE void ReinterpretingDCT(const float* input, const size_t input_stride,
38
                                  float* output, const size_t output_stride,
39
                                  float* JXL_RESTRICT block,
40
4.29M
                                  float* JXL_RESTRICT scratch_space) {
41
4.29M
  static_assert(LF_ROWS == ROWS,
42
4.29M
                "ReinterpretingDCT should only be called with LF == N");
43
4.29M
  static_assert(LF_COLS == COLS,
44
4.29M
                "ReinterpretingDCT should only be called with LF == N");
45
4.29M
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
4.29M
                                 scratch_space);
47
4.29M
  if (ROWS < COLS) {
48
4.19M
    for (size_t y = 0; y < LF_ROWS; y++) {
49
9.38M
      for (size_t x = 0; x < LF_COLS; x++) {
50
7.03M
        output[y * output_stride + x] =
51
7.03M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
7.03M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
7.03M
      }
54
2.34M
    }
55
2.44M
  } else {
56
8.07M
    for (size_t y = 0; y < LF_COLS; y++) {
57
27.6M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
22.0M
        output[y * output_stride + x] =
59
22.0M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
22.0M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
22.0M
      }
62
5.63M
    }
63
2.44M
  }
64
4.29M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
972k
                                  float* JXL_RESTRICT scratch_space) {
41
972k
  static_assert(LF_ROWS == ROWS,
42
972k
                "ReinterpretingDCT should only be called with LF == N");
43
972k
  static_assert(LF_COLS == COLS,
44
972k
                "ReinterpretingDCT should only be called with LF == N");
45
972k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
972k
                                 scratch_space);
47
972k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
972k
  } else {
56
1.94M
    for (size_t y = 0; y < LF_COLS; y++) {
57
2.91M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.94M
        output[y * output_stride + x] =
59
1.94M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.94M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.94M
      }
62
972k
    }
63
972k
  }
64
972k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
1.41M
                                  float* JXL_RESTRICT scratch_space) {
41
1.41M
  static_assert(LF_ROWS == ROWS,
42
1.41M
                "ReinterpretingDCT should only be called with LF == N");
43
1.41M
  static_assert(LF_COLS == COLS,
44
1.41M
                "ReinterpretingDCT should only be called with LF == N");
45
1.41M
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
1.41M
                                 scratch_space);
47
1.41M
  if (ROWS < COLS) {
48
2.83M
    for (size_t y = 0; y < LF_ROWS; y++) {
49
4.24M
      for (size_t x = 0; x < LF_COLS; x++) {
50
2.83M
        output[y * output_stride + x] =
51
2.83M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
2.83M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
2.83M
      }
54
1.41M
    }
55
1.41M
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
1.41M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
551k
                                  float* JXL_RESTRICT scratch_space) {
41
551k
  static_assert(LF_ROWS == ROWS,
42
551k
                "ReinterpretingDCT should only be called with LF == N");
43
551k
  static_assert(LF_COLS == COLS,
44
551k
                "ReinterpretingDCT should only be called with LF == N");
45
551k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
551k
                                 scratch_space);
47
551k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
551k
  } else {
56
1.65M
    for (size_t y = 0; y < LF_COLS; y++) {
57
3.30M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
2.20M
        output[y * output_stride + x] =
59
2.20M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
2.20M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
2.20M
      }
62
1.10M
    }
63
551k
  }
64
551k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
6.36k
                                  float* JXL_RESTRICT scratch_space) {
41
6.36k
  static_assert(LF_ROWS == ROWS,
42
6.36k
                "ReinterpretingDCT should only be called with LF == N");
43
6.36k
  static_assert(LF_COLS == COLS,
44
6.36k
                "ReinterpretingDCT should only be called with LF == N");
45
6.36k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
6.36k
                                 scratch_space);
47
6.36k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
6.36k
  } else {
56
12.7k
    for (size_t y = 0; y < LF_COLS; y++) {
57
31.8k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
25.4k
        output[y * output_stride + x] =
59
25.4k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
25.4k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
25.4k
      }
62
6.36k
    }
63
6.36k
  }
64
6.36k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
7.75k
                                  float* JXL_RESTRICT scratch_space) {
41
7.75k
  static_assert(LF_ROWS == ROWS,
42
7.75k
                "ReinterpretingDCT should only be called with LF == N");
43
7.75k
  static_assert(LF_COLS == COLS,
44
7.75k
                "ReinterpretingDCT should only be called with LF == N");
45
7.75k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
7.75k
                                 scratch_space);
47
7.75k
  if (ROWS < COLS) {
48
15.5k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
38.7k
      for (size_t x = 0; x < LF_COLS; x++) {
50
31.0k
        output[y * output_stride + x] =
51
31.0k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
31.0k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
31.0k
      }
54
7.75k
    }
55
7.75k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
7.75k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
258k
                                  float* JXL_RESTRICT scratch_space) {
41
258k
  static_assert(LF_ROWS == ROWS,
42
258k
                "ReinterpretingDCT should only be called with LF == N");
43
258k
  static_assert(LF_COLS == COLS,
44
258k
                "ReinterpretingDCT should only be called with LF == N");
45
258k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
258k
                                 scratch_space);
47
258k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
258k
  } else {
56
774k
    for (size_t y = 0; y < LF_COLS; y++) {
57
2.58M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
2.06M
        output[y * output_stride + x] =
59
2.06M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
2.06M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
2.06M
      }
62
516k
    }
63
258k
  }
64
258k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
399k
                                  float* JXL_RESTRICT scratch_space) {
41
399k
  static_assert(LF_ROWS == ROWS,
42
399k
                "ReinterpretingDCT should only be called with LF == N");
43
399k
  static_assert(LF_COLS == COLS,
44
399k
                "ReinterpretingDCT should only be called with LF == N");
45
399k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
399k
                                 scratch_space);
47
399k
  if (ROWS < COLS) {
48
1.19M
    for (size_t y = 0; y < LF_ROWS; y++) {
49
3.99M
      for (size_t x = 0; x < LF_COLS; x++) {
50
3.19M
        output[y * output_stride + x] =
51
3.19M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
3.19M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
3.19M
      }
54
799k
    }
55
399k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
399k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
531k
                                  float* JXL_RESTRICT scratch_space) {
41
531k
  static_assert(LF_ROWS == ROWS,
42
531k
                "ReinterpretingDCT should only be called with LF == N");
43
531k
  static_assert(LF_COLS == COLS,
44
531k
                "ReinterpretingDCT should only be called with LF == N");
45
531k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
531k
                                 scratch_space);
47
531k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
531k
  } else {
56
2.65M
    for (size_t y = 0; y < LF_COLS; y++) {
57
10.6M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
8.50M
        output[y * output_stride + x] =
59
8.50M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
8.50M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
8.50M
      }
62
2.12M
    }
63
531k
  }
64
531k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
18.1k
                                  float* JXL_RESTRICT scratch_space) {
41
18.1k
  static_assert(LF_ROWS == ROWS,
42
18.1k
                "ReinterpretingDCT should only be called with LF == N");
43
18.1k
  static_assert(LF_COLS == COLS,
44
18.1k
                "ReinterpretingDCT should only be called with LF == N");
45
18.1k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
18.1k
                                 scratch_space);
47
18.1k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
18.1k
  } else {
56
90.9k
    for (size_t y = 0; y < LF_COLS; y++) {
57
654k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
582k
        output[y * output_stride + x] =
59
582k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
582k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
582k
      }
62
72.7k
    }
63
18.1k
  }
64
18.1k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
30.4k
                                  float* JXL_RESTRICT scratch_space) {
41
30.4k
  static_assert(LF_ROWS == ROWS,
42
30.4k
                "ReinterpretingDCT should only be called with LF == N");
43
30.4k
  static_assert(LF_COLS == COLS,
44
30.4k
                "ReinterpretingDCT should only be called with LF == N");
45
30.4k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
30.4k
                                 scratch_space);
47
30.4k
  if (ROWS < COLS) {
48
152k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
1.09M
      for (size_t x = 0; x < LF_COLS; x++) {
50
973k
        output[y * output_stride + x] =
51
973k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
973k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
973k
      }
54
121k
    }
55
30.4k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
30.4k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
104k
                                  float* JXL_RESTRICT scratch_space) {
41
104k
  static_assert(LF_ROWS == ROWS,
42
104k
                "ReinterpretingDCT should only be called with LF == N");
43
104k
  static_assert(LF_COLS == COLS,
44
104k
                "ReinterpretingDCT should only be called with LF == N");
45
104k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
104k
                                 scratch_space);
47
104k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
104k
  } else {
56
940k
    for (size_t y = 0; y < LF_COLS; y++) {
57
7.52M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
6.68M
        output[y * output_stride + x] =
59
6.68M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
6.68M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
6.68M
      }
62
835k
    }
63
104k
  }
64
104k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
54
                                  float* JXL_RESTRICT scratch_space) {
41
54
  static_assert(LF_ROWS == ROWS,
42
54
                "ReinterpretingDCT should only be called with LF == N");
43
54
  static_assert(LF_COLS == COLS,
44
54
                "ReinterpretingDCT should only be called with LF == N");
45
54
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
54
                                 scratch_space);
47
54
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
54
  } else {
56
486
    for (size_t y = 0; y < LF_COLS; y++) {
57
7.34k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
6.91k
        output[y * output_stride + x] =
59
6.91k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
6.91k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
6.91k
      }
62
432
    }
63
54
  }
64
54
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
12
                                  float* JXL_RESTRICT scratch_space) {
41
12
  static_assert(LF_ROWS == ROWS,
42
12
                "ReinterpretingDCT should only be called with LF == N");
43
12
  static_assert(LF_COLS == COLS,
44
12
                "ReinterpretingDCT should only be called with LF == N");
45
12
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
12
                                 scratch_space);
47
12
  if (ROWS < COLS) {
48
108
    for (size_t y = 0; y < LF_ROWS; y++) {
49
1.63k
      for (size_t x = 0; x < LF_COLS; x++) {
50
1.53k
        output[y * output_stride + x] =
51
1.53k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
1.53k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
1.53k
      }
54
96
    }
55
12
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
12
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
21
                                  float* JXL_RESTRICT scratch_space) {
41
21
  static_assert(LF_ROWS == ROWS,
42
21
                "ReinterpretingDCT should only be called with LF == N");
43
21
  static_assert(LF_COLS == COLS,
44
21
                "ReinterpretingDCT should only be called with LF == N");
45
21
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
21
                                 scratch_space);
47
21
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
21
  } else {
56
357
    for (size_t y = 0; y < LF_COLS; y++) {
57
5.71k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
5.37k
        output[y * output_stride + x] =
59
5.37k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
5.37k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
5.37k
      }
62
336
    }
63
21
  }
64
21
}
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
27
                                  float* JXL_RESTRICT scratch_space) {
41
27
  static_assert(LF_ROWS == ROWS,
42
27
                "ReinterpretingDCT should only be called with LF == N");
43
27
  static_assert(LF_COLS == COLS,
44
27
                "ReinterpretingDCT should only be called with LF == N");
45
27
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
27
                                 scratch_space);
47
27
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
27
  } else {
56
891
    for (size_t y = 0; y < LF_COLS; y++) {
57
28.5k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
27.6k
        output[y * output_stride + x] =
59
27.6k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
27.6k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
27.6k
      }
62
864
    }
63
27
  }
64
27
}
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
65
66
template <size_t S>
67
55.0M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
55.0M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
55.0M
  static_assert(S % 2 == 0, "S should be even");
70
55.0M
  float temp[kDCTBlockSize];
71
55.0M
  constexpr size_t num_2x2 = S / 2;
72
183M
  for (size_t y = 0; y < num_2x2; y++) {
73
513M
    for (size_t x = 0; x < num_2x2; x++) {
74
385M
      float c00 = block[y * kBlockDim + x];
75
385M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
385M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
385M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
385M
      float r00 = c00 + c01 + c10 + c11;
79
385M
      float r01 = c00 + c01 - c10 - c11;
80
385M
      float r10 = c00 - c01 + c10 - c11;
81
385M
      float r11 = c00 - c01 - c10 + c11;
82
385M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
385M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
385M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
385M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
385M
    }
87
128M
  }
88
311M
  for (size_t y = 0; y < S; y++) {
89
1.79G
    for (size_t x = 0; x < S; x++) {
90
1.54G
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
1.54G
    }
92
256M
  }
93
55.0M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
3.18M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
3.18M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
3.18M
  static_assert(S % 2 == 0, "S should be even");
70
3.18M
  float temp[kDCTBlockSize];
71
3.18M
  constexpr size_t num_2x2 = S / 2;
72
6.37M
  for (size_t y = 0; y < num_2x2; y++) {
73
6.37M
    for (size_t x = 0; x < num_2x2; x++) {
74
3.18M
      float c00 = block[y * kBlockDim + x];
75
3.18M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
3.18M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
3.18M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
3.18M
      float r00 = c00 + c01 + c10 + c11;
79
3.18M
      float r01 = c00 + c01 - c10 - c11;
80
3.18M
      float r10 = c00 - c01 + c10 - c11;
81
3.18M
      float r11 = c00 - c01 - c10 + c11;
82
3.18M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
3.18M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
3.18M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
3.18M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
3.18M
    }
87
3.18M
  }
88
9.56M
  for (size_t y = 0; y < S; y++) {
89
19.1M
    for (size_t x = 0; x < S; x++) {
90
12.7M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
12.7M
    }
92
6.37M
  }
93
3.18M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
3.18M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
3.18M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
3.18M
  static_assert(S % 2 == 0, "S should be even");
70
3.18M
  float temp[kDCTBlockSize];
71
3.18M
  constexpr size_t num_2x2 = S / 2;
72
9.56M
  for (size_t y = 0; y < num_2x2; y++) {
73
19.1M
    for (size_t x = 0; x < num_2x2; x++) {
74
12.7M
      float c00 = block[y * kBlockDim + x];
75
12.7M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
12.7M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
12.7M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
12.7M
      float r00 = c00 + c01 + c10 + c11;
79
12.7M
      float r01 = c00 + c01 - c10 - c11;
80
12.7M
      float r10 = c00 - c01 + c10 - c11;
81
12.7M
      float r11 = c00 - c01 - c10 + c11;
82
12.7M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
12.7M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
12.7M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
12.7M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
12.7M
    }
87
6.37M
  }
88
15.9M
  for (size_t y = 0; y < S; y++) {
89
63.7M
    for (size_t x = 0; x < S; x++) {
90
51.0M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
51.0M
    }
92
12.7M
  }
93
3.18M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
3.18M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
3.18M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
3.18M
  static_assert(S % 2 == 0, "S should be even");
70
3.18M
  float temp[kDCTBlockSize];
71
3.18M
  constexpr size_t num_2x2 = S / 2;
72
15.9M
  for (size_t y = 0; y < num_2x2; y++) {
73
63.7M
    for (size_t x = 0; x < num_2x2; x++) {
74
51.0M
      float c00 = block[y * kBlockDim + x];
75
51.0M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
51.0M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
51.0M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
51.0M
      float r00 = c00 + c01 + c10 + c11;
79
51.0M
      float r01 = c00 + c01 - c10 - c11;
80
51.0M
      float r10 = c00 - c01 + c10 - c11;
81
51.0M
      float r11 = c00 - c01 - c10 + c11;
82
51.0M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
51.0M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
51.0M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
51.0M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
51.0M
    }
87
12.7M
  }
88
28.7M
  for (size_t y = 0; y < S; y++) {
89
229M
    for (size_t x = 0; x < S; x++) {
90
204M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
204M
    }
92
25.5M
  }
93
3.18M
}
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
15.1M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
15.1M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
15.1M
  static_assert(S % 2 == 0, "S should be even");
70
15.1M
  float temp[kDCTBlockSize];
71
15.1M
  constexpr size_t num_2x2 = S / 2;
72
30.3M
  for (size_t y = 0; y < num_2x2; y++) {
73
30.3M
    for (size_t x = 0; x < num_2x2; x++) {
74
15.1M
      float c00 = block[y * kBlockDim + x];
75
15.1M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
15.1M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
15.1M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
15.1M
      float r00 = c00 + c01 + c10 + c11;
79
15.1M
      float r01 = c00 + c01 - c10 - c11;
80
15.1M
      float r10 = c00 - c01 + c10 - c11;
81
15.1M
      float r11 = c00 - c01 - c10 + c11;
82
15.1M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
15.1M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
15.1M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
15.1M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
15.1M
    }
87
15.1M
  }
88
45.4M
  for (size_t y = 0; y < S; y++) {
89
90.9M
    for (size_t x = 0; x < S; x++) {
90
60.6M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
60.6M
    }
92
30.3M
  }
93
15.1M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
15.1M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
15.1M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
15.1M
  static_assert(S % 2 == 0, "S should be even");
70
15.1M
  float temp[kDCTBlockSize];
71
15.1M
  constexpr size_t num_2x2 = S / 2;
72
45.4M
  for (size_t y = 0; y < num_2x2; y++) {
73
90.9M
    for (size_t x = 0; x < num_2x2; x++) {
74
60.6M
      float c00 = block[y * kBlockDim + x];
75
60.6M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
60.6M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
60.6M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
60.6M
      float r00 = c00 + c01 + c10 + c11;
79
60.6M
      float r01 = c00 + c01 - c10 - c11;
80
60.6M
      float r10 = c00 - c01 + c10 - c11;
81
60.6M
      float r11 = c00 - c01 - c10 + c11;
82
60.6M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
60.6M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
60.6M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
60.6M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
60.6M
    }
87
30.3M
  }
88
75.8M
  for (size_t y = 0; y < S; y++) {
89
303M
    for (size_t x = 0; x < S; x++) {
90
242M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
242M
    }
92
60.6M
  }
93
15.1M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
15.1M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
15.1M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
15.1M
  static_assert(S % 2 == 0, "S should be even");
70
15.1M
  float temp[kDCTBlockSize];
71
15.1M
  constexpr size_t num_2x2 = S / 2;
72
75.8M
  for (size_t y = 0; y < num_2x2; y++) {
73
303M
    for (size_t x = 0; x < num_2x2; x++) {
74
242M
      float c00 = block[y * kBlockDim + x];
75
242M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
242M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
242M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
242M
      float r00 = c00 + c01 + c10 + c11;
79
242M
      float r01 = c00 + c01 - c10 - c11;
80
242M
      float r10 = c00 - c01 + c10 - c11;
81
242M
      float r11 = c00 - c01 - c10 + c11;
82
242M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
242M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
242M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
242M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
242M
    }
87
60.6M
  }
88
136M
  for (size_t y = 0; y < S; y++) {
89
1.09G
    for (size_t x = 0; x < S; x++) {
90
970M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
970M
    }
92
121M
  }
93
15.1M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
94
95
64.2M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
64.2M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
64.2M
      {
98
64.2M
          0.25,
99
64.2M
          0.25,
100
64.2M
          0.25,
101
64.2M
          0.25,
102
64.2M
          0.25,
103
64.2M
          0.25,
104
64.2M
          0.25,
105
64.2M
          0.25,
106
64.2M
          0.25,
107
64.2M
          0.25,
108
64.2M
          0.25,
109
64.2M
          0.25,
110
64.2M
          0.25,
111
64.2M
          0.25,
112
64.2M
          0.25,
113
64.2M
          0.25,
114
64.2M
      },
115
64.2M
      {
116
64.2M
          0.876902929799142f,
117
64.2M
          0.2206518106944235f,
118
64.2M
          -0.10140050393753763f,
119
64.2M
          -0.1014005039375375f,
120
64.2M
          0.2206518106944236f,
121
64.2M
          -0.10140050393753777f,
122
64.2M
          -0.10140050393753772f,
123
64.2M
          -0.10140050393753763f,
124
64.2M
          -0.10140050393753758f,
125
64.2M
          -0.10140050393753769f,
126
64.2M
          -0.1014005039375375f,
127
64.2M
          -0.10140050393753768f,
128
64.2M
          -0.10140050393753768f,
129
64.2M
          -0.10140050393753759f,
130
64.2M
          -0.10140050393753763f,
131
64.2M
          -0.10140050393753741f,
132
64.2M
      },
133
64.2M
      {
134
64.2M
          0.0,
135
64.2M
          0.0,
136
64.2M
          0.40670075830260755f,
137
64.2M
          0.44444816619734445f,
138
64.2M
          0.0,
139
64.2M
          0.0,
140
64.2M
          0.19574399372042936f,
141
64.2M
          0.2929100136981264f,
142
64.2M
          -0.40670075830260716f,
143
64.2M
          -0.19574399372042872f,
144
64.2M
          0.0,
145
64.2M
          0.11379074460448091f,
146
64.2M
          -0.44444816619734384f,
147
64.2M
          -0.29291001369812636f,
148
64.2M
          -0.1137907446044814f,
149
64.2M
          0.0,
150
64.2M
      },
151
64.2M
      {
152
64.2M
          0.0,
153
64.2M
          0.0,
154
64.2M
          -0.21255748058288748f,
155
64.2M
          0.3085497062849767f,
156
64.2M
          0.0,
157
64.2M
          0.4706702258572536f,
158
64.2M
          -0.1621205195722993f,
159
64.2M
          0.0,
160
64.2M
          -0.21255748058287047f,
161
64.2M
          -0.16212051957228327f,
162
64.2M
          -0.47067022585725277f,
163
64.2M
          -0.1464291867126764f,
164
64.2M
          0.3085497062849487f,
165
64.2M
          0.0,
166
64.2M
          -0.14642918671266536f,
167
64.2M
          0.4251149611657548f,
168
64.2M
      },
169
64.2M
      {
170
64.2M
          0.0,
171
64.2M
          -0.7071067811865474f,
172
64.2M
          0.0,
173
64.2M
          0.0,
174
64.2M
          0.7071067811865476f,
175
64.2M
          0.0,
176
64.2M
          0.0,
177
64.2M
          0.0,
178
64.2M
          0.0,
179
64.2M
          0.0,
180
64.2M
          0.0,
181
64.2M
          0.0,
182
64.2M
          0.0,
183
64.2M
          0.0,
184
64.2M
          0.0,
185
64.2M
          0.0,
186
64.2M
      },
187
64.2M
      {
188
64.2M
          -0.4105377591765233f,
189
64.2M
          0.6235485373547691f,
190
64.2M
          -0.06435071657946274f,
191
64.2M
          -0.06435071657946266f,
192
64.2M
          0.6235485373547694f,
193
64.2M
          -0.06435071657946284f,
194
64.2M
          -0.0643507165794628f,
195
64.2M
          -0.06435071657946274f,
196
64.2M
          -0.06435071657946272f,
197
64.2M
          -0.06435071657946279f,
198
64.2M
          -0.06435071657946266f,
199
64.2M
          -0.06435071657946277f,
200
64.2M
          -0.06435071657946277f,
201
64.2M
          -0.06435071657946273f,
202
64.2M
          -0.06435071657946274f,
203
64.2M
          -0.0643507165794626f,
204
64.2M
      },
205
64.2M
      {
206
64.2M
          0.0,
207
64.2M
          0.0,
208
64.2M
          -0.4517556589999482f,
209
64.2M
          0.15854503551840063f,
210
64.2M
          0.0,
211
64.2M
          -0.04038515160822202f,
212
64.2M
          0.0074182263792423875f,
213
64.2M
          0.39351034269210167f,
214
64.2M
          -0.45175565899994635f,
215
64.2M
          0.007418226379244351f,
216
64.2M
          0.1107416575309343f,
217
64.2M
          0.08298163094882051f,
218
64.2M
          0.15854503551839705f,
219
64.2M
          0.3935103426921022f,
220
64.2M
          0.0829816309488214f,
221
64.2M
          -0.45175565899994796f,
222
64.2M
      },
223
64.2M
      {
224
64.2M
          0.0,
225
64.2M
          0.0,
226
64.2M
          -0.304684750724869f,
227
64.2M
          0.5112616136591823f,
228
64.2M
          0.0,
229
64.2M
          0.0,
230
64.2M
          -0.290480129728998f,
231
64.2M
          -0.06578701549142804f,
232
64.2M
          0.304684750724884f,
233
64.2M
          0.2904801297290076f,
234
64.2M
          0.0,
235
64.2M
          -0.23889773523344604f,
236
64.2M
          -0.5112616136592012f,
237
64.2M
          0.06578701549142545f,
238
64.2M
          0.23889773523345467f,
239
64.2M
          0.0,
240
64.2M
      },
241
64.2M
      {
242
64.2M
          0.0,
243
64.2M
          0.0,
244
64.2M
          0.3017929516615495f,
245
64.2M
          0.25792362796341184f,
246
64.2M
          0.0,
247
64.2M
          0.16272340142866204f,
248
64.2M
          0.09520022653475037f,
249
64.2M
          0.0,
250
64.2M
          0.3017929516615503f,
251
64.2M
          0.09520022653475055f,
252
64.2M
          -0.16272340142866173f,
253
64.2M
          -0.35312385449816297f,
254
64.2M
          0.25792362796341295f,
255
64.2M
          0.0,
256
64.2M
          -0.3531238544981624f,
257
64.2M
          -0.6035859033230976f,
258
64.2M
      },
259
64.2M
      {
260
64.2M
          0.0,
261
64.2M
          0.0,
262
64.2M
          0.40824829046386274f,
263
64.2M
          0.0,
264
64.2M
          0.0,
265
64.2M
          0.0,
266
64.2M
          0.0,
267
64.2M
          -0.4082482904638628f,
268
64.2M
          -0.4082482904638635f,
269
64.2M
          0.0,
270
64.2M
          0.0,
271
64.2M
          -0.40824829046386296f,
272
64.2M
          0.0,
273
64.2M
          0.4082482904638634f,
274
64.2M
          0.408248290463863f,
275
64.2M
          0.0,
276
64.2M
      },
277
64.2M
      {
278
64.2M
          0.0,
279
64.2M
          0.0,
280
64.2M
          0.1747866975480809f,
281
64.2M
          0.0812611176717539f,
282
64.2M
          0.0,
283
64.2M
          0.0,
284
64.2M
          -0.3675398009862027f,
285
64.2M
          -0.307882213957909f,
286
64.2M
          -0.17478669754808135f,
287
64.2M
          0.3675398009862011f,
288
64.2M
          0.0,
289
64.2M
          0.4826689115059883f,
290
64.2M
          -0.08126111767175039f,
291
64.2M
          0.30788221395790305f,
292
64.2M
          -0.48266891150598584f,
293
64.2M
          0.0,
294
64.2M
      },
295
64.2M
      {
296
64.2M
          0.0,
297
64.2M
          0.0,
298
64.2M
          -0.21105601049335784f,
299
64.2M
          0.18567180916109802f,
300
64.2M
          0.0,
301
64.2M
          0.0,
302
64.2M
          0.49215859013738733f,
303
64.2M
          -0.38525013709251915f,
304
64.2M
          0.21105601049335806f,
305
64.2M
          -0.49215859013738905f,
306
64.2M
          0.0,
307
64.2M
          0.17419412659916217f,
308
64.2M
          -0.18567180916109904f,
309
64.2M
          0.3852501370925211f,
310
64.2M
          -0.1741941265991621f,
311
64.2M
          0.0,
312
64.2M
      },
313
64.2M
      {
314
64.2M
          0.0,
315
64.2M
          0.0,
316
64.2M
          -0.14266084808807264f,
317
64.2M
          -0.3416446842253372f,
318
64.2M
          0.0,
319
64.2M
          0.7367497537172237f,
320
64.2M
          0.24627107722075148f,
321
64.2M
          -0.08574019035519306f,
322
64.2M
          -0.14266084808807344f,
323
64.2M
          0.24627107722075137f,
324
64.2M
          0.14883399227113567f,
325
64.2M
          -0.04768680350229251f,
326
64.2M
          -0.3416446842253373f,
327
64.2M
          -0.08574019035519267f,
328
64.2M
          -0.047686803502292804f,
329
64.2M
          -0.14266084808807242f,
330
64.2M
      },
331
64.2M
      {
332
64.2M
          0.0,
333
64.2M
          0.0,
334
64.2M
          -0.13813540350758585f,
335
64.2M
          0.3302282550303788f,
336
64.2M
          0.0,
337
64.2M
          0.08755115000587084f,
338
64.2M
          -0.07946706605909573f,
339
64.2M
          -0.4613374887461511f,
340
64.2M
          -0.13813540350758294f,
341
64.2M
          -0.07946706605910261f,
342
64.2M
          0.49724647109535086f,
343
64.2M
          0.12538059448563663f,
344
64.2M
          0.3302282550303805f,
345
64.2M
          -0.4613374887461554f,
346
64.2M
          0.12538059448564315f,
347
64.2M
          -0.13813540350758452f,
348
64.2M
      },
349
64.2M
      {
350
64.2M
          0.0,
351
64.2M
          0.0,
352
64.2M
          -0.17437602599651067f,
353
64.2M
          0.0702790691196284f,
354
64.2M
          0.0,
355
64.2M
          -0.2921026642334881f,
356
64.2M
          0.3623817333531167f,
357
64.2M
          0.0,
358
64.2M
          -0.1743760259965108f,
359
64.2M
          0.36238173335311646f,
360
64.2M
          0.29210266423348785f,
361
64.2M
          -0.4326608024727445f,
362
64.2M
          0.07027906911962818f,
363
64.2M
          0.0,
364
64.2M
          -0.4326608024727457f,
365
64.2M
          0.34875205199302267f,
366
64.2M
      },
367
64.2M
      {
368
64.2M
          0.0,
369
64.2M
          0.0,
370
64.2M
          0.11354987314994337f,
371
64.2M
          -0.07417504595810355f,
372
64.2M
          0.0,
373
64.2M
          0.19402893032594343f,
374
64.2M
          -0.435190496523228f,
375
64.2M
          0.21918684838857466f,
376
64.2M
          0.11354987314994257f,
377
64.2M
          -0.4351904965232251f,
378
64.2M
          0.5550443808910661f,
379
64.2M
          -0.25468277124066463f,
380
64.2M
          -0.07417504595810233f,
381
64.2M
          0.2191868483885728f,
382
64.2M
          -0.25468277124066413f,
383
64.2M
          0.1135498731499429f,
384
64.2M
      },
385
64.2M
  };
386
387
64.2M
  const HWY_CAPPED(float, 16) d;
388
192M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
128M
    auto pixel = Zero(d);
390
2.18G
    for (size_t j = 0; j < 16; j++) {
391
2.05G
      auto cf = Set(d, coeffs[j]);
392
2.05G
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
2.05G
      pixel = MulAdd(cf, basis, pixel);
394
2.05G
    }
395
128M
    Store(pixel, d, pixels + i);
396
128M
  }
397
64.2M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
3.57M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
3.57M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
3.57M
      {
98
3.57M
          0.25,
99
3.57M
          0.25,
100
3.57M
          0.25,
101
3.57M
          0.25,
102
3.57M
          0.25,
103
3.57M
          0.25,
104
3.57M
          0.25,
105
3.57M
          0.25,
106
3.57M
          0.25,
107
3.57M
          0.25,
108
3.57M
          0.25,
109
3.57M
          0.25,
110
3.57M
          0.25,
111
3.57M
          0.25,
112
3.57M
          0.25,
113
3.57M
          0.25,
114
3.57M
      },
115
3.57M
      {
116
3.57M
          0.876902929799142f,
117
3.57M
          0.2206518106944235f,
118
3.57M
          -0.10140050393753763f,
119
3.57M
          -0.1014005039375375f,
120
3.57M
          0.2206518106944236f,
121
3.57M
          -0.10140050393753777f,
122
3.57M
          -0.10140050393753772f,
123
3.57M
          -0.10140050393753763f,
124
3.57M
          -0.10140050393753758f,
125
3.57M
          -0.10140050393753769f,
126
3.57M
          -0.1014005039375375f,
127
3.57M
          -0.10140050393753768f,
128
3.57M
          -0.10140050393753768f,
129
3.57M
          -0.10140050393753759f,
130
3.57M
          -0.10140050393753763f,
131
3.57M
          -0.10140050393753741f,
132
3.57M
      },
133
3.57M
      {
134
3.57M
          0.0,
135
3.57M
          0.0,
136
3.57M
          0.40670075830260755f,
137
3.57M
          0.44444816619734445f,
138
3.57M
          0.0,
139
3.57M
          0.0,
140
3.57M
          0.19574399372042936f,
141
3.57M
          0.2929100136981264f,
142
3.57M
          -0.40670075830260716f,
143
3.57M
          -0.19574399372042872f,
144
3.57M
          0.0,
145
3.57M
          0.11379074460448091f,
146
3.57M
          -0.44444816619734384f,
147
3.57M
          -0.29291001369812636f,
148
3.57M
          -0.1137907446044814f,
149
3.57M
          0.0,
150
3.57M
      },
151
3.57M
      {
152
3.57M
          0.0,
153
3.57M
          0.0,
154
3.57M
          -0.21255748058288748f,
155
3.57M
          0.3085497062849767f,
156
3.57M
          0.0,
157
3.57M
          0.4706702258572536f,
158
3.57M
          -0.1621205195722993f,
159
3.57M
          0.0,
160
3.57M
          -0.21255748058287047f,
161
3.57M
          -0.16212051957228327f,
162
3.57M
          -0.47067022585725277f,
163
3.57M
          -0.1464291867126764f,
164
3.57M
          0.3085497062849487f,
165
3.57M
          0.0,
166
3.57M
          -0.14642918671266536f,
167
3.57M
          0.4251149611657548f,
168
3.57M
      },
169
3.57M
      {
170
3.57M
          0.0,
171
3.57M
          -0.7071067811865474f,
172
3.57M
          0.0,
173
3.57M
          0.0,
174
3.57M
          0.7071067811865476f,
175
3.57M
          0.0,
176
3.57M
          0.0,
177
3.57M
          0.0,
178
3.57M
          0.0,
179
3.57M
          0.0,
180
3.57M
          0.0,
181
3.57M
          0.0,
182
3.57M
          0.0,
183
3.57M
          0.0,
184
3.57M
          0.0,
185
3.57M
          0.0,
186
3.57M
      },
187
3.57M
      {
188
3.57M
          -0.4105377591765233f,
189
3.57M
          0.6235485373547691f,
190
3.57M
          -0.06435071657946274f,
191
3.57M
          -0.06435071657946266f,
192
3.57M
          0.6235485373547694f,
193
3.57M
          -0.06435071657946284f,
194
3.57M
          -0.0643507165794628f,
195
3.57M
          -0.06435071657946274f,
196
3.57M
          -0.06435071657946272f,
197
3.57M
          -0.06435071657946279f,
198
3.57M
          -0.06435071657946266f,
199
3.57M
          -0.06435071657946277f,
200
3.57M
          -0.06435071657946277f,
201
3.57M
          -0.06435071657946273f,
202
3.57M
          -0.06435071657946274f,
203
3.57M
          -0.0643507165794626f,
204
3.57M
      },
205
3.57M
      {
206
3.57M
          0.0,
207
3.57M
          0.0,
208
3.57M
          -0.4517556589999482f,
209
3.57M
          0.15854503551840063f,
210
3.57M
          0.0,
211
3.57M
          -0.04038515160822202f,
212
3.57M
          0.0074182263792423875f,
213
3.57M
          0.39351034269210167f,
214
3.57M
          -0.45175565899994635f,
215
3.57M
          0.007418226379244351f,
216
3.57M
          0.1107416575309343f,
217
3.57M
          0.08298163094882051f,
218
3.57M
          0.15854503551839705f,
219
3.57M
          0.3935103426921022f,
220
3.57M
          0.0829816309488214f,
221
3.57M
          -0.45175565899994796f,
222
3.57M
      },
223
3.57M
      {
224
3.57M
          0.0,
225
3.57M
          0.0,
226
3.57M
          -0.304684750724869f,
227
3.57M
          0.5112616136591823f,
228
3.57M
          0.0,
229
3.57M
          0.0,
230
3.57M
          -0.290480129728998f,
231
3.57M
          -0.06578701549142804f,
232
3.57M
          0.304684750724884f,
233
3.57M
          0.2904801297290076f,
234
3.57M
          0.0,
235
3.57M
          -0.23889773523344604f,
236
3.57M
          -0.5112616136592012f,
237
3.57M
          0.06578701549142545f,
238
3.57M
          0.23889773523345467f,
239
3.57M
          0.0,
240
3.57M
      },
241
3.57M
      {
242
3.57M
          0.0,
243
3.57M
          0.0,
244
3.57M
          0.3017929516615495f,
245
3.57M
          0.25792362796341184f,
246
3.57M
          0.0,
247
3.57M
          0.16272340142866204f,
248
3.57M
          0.09520022653475037f,
249
3.57M
          0.0,
250
3.57M
          0.3017929516615503f,
251
3.57M
          0.09520022653475055f,
252
3.57M
          -0.16272340142866173f,
253
3.57M
          -0.35312385449816297f,
254
3.57M
          0.25792362796341295f,
255
3.57M
          0.0,
256
3.57M
          -0.3531238544981624f,
257
3.57M
          -0.6035859033230976f,
258
3.57M
      },
259
3.57M
      {
260
3.57M
          0.0,
261
3.57M
          0.0,
262
3.57M
          0.40824829046386274f,
263
3.57M
          0.0,
264
3.57M
          0.0,
265
3.57M
          0.0,
266
3.57M
          0.0,
267
3.57M
          -0.4082482904638628f,
268
3.57M
          -0.4082482904638635f,
269
3.57M
          0.0,
270
3.57M
          0.0,
271
3.57M
          -0.40824829046386296f,
272
3.57M
          0.0,
273
3.57M
          0.4082482904638634f,
274
3.57M
          0.408248290463863f,
275
3.57M
          0.0,
276
3.57M
      },
277
3.57M
      {
278
3.57M
          0.0,
279
3.57M
          0.0,
280
3.57M
          0.1747866975480809f,
281
3.57M
          0.0812611176717539f,
282
3.57M
          0.0,
283
3.57M
          0.0,
284
3.57M
          -0.3675398009862027f,
285
3.57M
          -0.307882213957909f,
286
3.57M
          -0.17478669754808135f,
287
3.57M
          0.3675398009862011f,
288
3.57M
          0.0,
289
3.57M
          0.4826689115059883f,
290
3.57M
          -0.08126111767175039f,
291
3.57M
          0.30788221395790305f,
292
3.57M
          -0.48266891150598584f,
293
3.57M
          0.0,
294
3.57M
      },
295
3.57M
      {
296
3.57M
          0.0,
297
3.57M
          0.0,
298
3.57M
          -0.21105601049335784f,
299
3.57M
          0.18567180916109802f,
300
3.57M
          0.0,
301
3.57M
          0.0,
302
3.57M
          0.49215859013738733f,
303
3.57M
          -0.38525013709251915f,
304
3.57M
          0.21105601049335806f,
305
3.57M
          -0.49215859013738905f,
306
3.57M
          0.0,
307
3.57M
          0.17419412659916217f,
308
3.57M
          -0.18567180916109904f,
309
3.57M
          0.3852501370925211f,
310
3.57M
          -0.1741941265991621f,
311
3.57M
          0.0,
312
3.57M
      },
313
3.57M
      {
314
3.57M
          0.0,
315
3.57M
          0.0,
316
3.57M
          -0.14266084808807264f,
317
3.57M
          -0.3416446842253372f,
318
3.57M
          0.0,
319
3.57M
          0.7367497537172237f,
320
3.57M
          0.24627107722075148f,
321
3.57M
          -0.08574019035519306f,
322
3.57M
          -0.14266084808807344f,
323
3.57M
          0.24627107722075137f,
324
3.57M
          0.14883399227113567f,
325
3.57M
          -0.04768680350229251f,
326
3.57M
          -0.3416446842253373f,
327
3.57M
          -0.08574019035519267f,
328
3.57M
          -0.047686803502292804f,
329
3.57M
          -0.14266084808807242f,
330
3.57M
      },
331
3.57M
      {
332
3.57M
          0.0,
333
3.57M
          0.0,
334
3.57M
          -0.13813540350758585f,
335
3.57M
          0.3302282550303788f,
336
3.57M
          0.0,
337
3.57M
          0.08755115000587084f,
338
3.57M
          -0.07946706605909573f,
339
3.57M
          -0.4613374887461511f,
340
3.57M
          -0.13813540350758294f,
341
3.57M
          -0.07946706605910261f,
342
3.57M
          0.49724647109535086f,
343
3.57M
          0.12538059448563663f,
344
3.57M
          0.3302282550303805f,
345
3.57M
          -0.4613374887461554f,
346
3.57M
          0.12538059448564315f,
347
3.57M
          -0.13813540350758452f,
348
3.57M
      },
349
3.57M
      {
350
3.57M
          0.0,
351
3.57M
          0.0,
352
3.57M
          -0.17437602599651067f,
353
3.57M
          0.0702790691196284f,
354
3.57M
          0.0,
355
3.57M
          -0.2921026642334881f,
356
3.57M
          0.3623817333531167f,
357
3.57M
          0.0,
358
3.57M
          -0.1743760259965108f,
359
3.57M
          0.36238173335311646f,
360
3.57M
          0.29210266423348785f,
361
3.57M
          -0.4326608024727445f,
362
3.57M
          0.07027906911962818f,
363
3.57M
          0.0,
364
3.57M
          -0.4326608024727457f,
365
3.57M
          0.34875205199302267f,
366
3.57M
      },
367
3.57M
      {
368
3.57M
          0.0,
369
3.57M
          0.0,
370
3.57M
          0.11354987314994337f,
371
3.57M
          -0.07417504595810355f,
372
3.57M
          0.0,
373
3.57M
          0.19402893032594343f,
374
3.57M
          -0.435190496523228f,
375
3.57M
          0.21918684838857466f,
376
3.57M
          0.11354987314994257f,
377
3.57M
          -0.4351904965232251f,
378
3.57M
          0.5550443808910661f,
379
3.57M
          -0.25468277124066463f,
380
3.57M
          -0.07417504595810233f,
381
3.57M
          0.2191868483885728f,
382
3.57M
          -0.25468277124066413f,
383
3.57M
          0.1135498731499429f,
384
3.57M
      },
385
3.57M
  };
386
387
3.57M
  const HWY_CAPPED(float, 16) d;
388
10.7M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
7.15M
    auto pixel = Zero(d);
390
121M
    for (size_t j = 0; j < 16; j++) {
391
114M
      auto cf = Set(d, coeffs[j]);
392
114M
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
114M
      pixel = MulAdd(cf, basis, pixel);
394
114M
    }
395
7.15M
    Store(pixel, d, pixels + i);
396
7.15M
  }
397
3.57M
}
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
60.6M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
60.6M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
60.6M
      {
98
60.6M
          0.25,
99
60.6M
          0.25,
100
60.6M
          0.25,
101
60.6M
          0.25,
102
60.6M
          0.25,
103
60.6M
          0.25,
104
60.6M
          0.25,
105
60.6M
          0.25,
106
60.6M
          0.25,
107
60.6M
          0.25,
108
60.6M
          0.25,
109
60.6M
          0.25,
110
60.6M
          0.25,
111
60.6M
          0.25,
112
60.6M
          0.25,
113
60.6M
          0.25,
114
60.6M
      },
115
60.6M
      {
116
60.6M
          0.876902929799142f,
117
60.6M
          0.2206518106944235f,
118
60.6M
          -0.10140050393753763f,
119
60.6M
          -0.1014005039375375f,
120
60.6M
          0.2206518106944236f,
121
60.6M
          -0.10140050393753777f,
122
60.6M
          -0.10140050393753772f,
123
60.6M
          -0.10140050393753763f,
124
60.6M
          -0.10140050393753758f,
125
60.6M
          -0.10140050393753769f,
126
60.6M
          -0.1014005039375375f,
127
60.6M
          -0.10140050393753768f,
128
60.6M
          -0.10140050393753768f,
129
60.6M
          -0.10140050393753759f,
130
60.6M
          -0.10140050393753763f,
131
60.6M
          -0.10140050393753741f,
132
60.6M
      },
133
60.6M
      {
134
60.6M
          0.0,
135
60.6M
          0.0,
136
60.6M
          0.40670075830260755f,
137
60.6M
          0.44444816619734445f,
138
60.6M
          0.0,
139
60.6M
          0.0,
140
60.6M
          0.19574399372042936f,
141
60.6M
          0.2929100136981264f,
142
60.6M
          -0.40670075830260716f,
143
60.6M
          -0.19574399372042872f,
144
60.6M
          0.0,
145
60.6M
          0.11379074460448091f,
146
60.6M
          -0.44444816619734384f,
147
60.6M
          -0.29291001369812636f,
148
60.6M
          -0.1137907446044814f,
149
60.6M
          0.0,
150
60.6M
      },
151
60.6M
      {
152
60.6M
          0.0,
153
60.6M
          0.0,
154
60.6M
          -0.21255748058288748f,
155
60.6M
          0.3085497062849767f,
156
60.6M
          0.0,
157
60.6M
          0.4706702258572536f,
158
60.6M
          -0.1621205195722993f,
159
60.6M
          0.0,
160
60.6M
          -0.21255748058287047f,
161
60.6M
          -0.16212051957228327f,
162
60.6M
          -0.47067022585725277f,
163
60.6M
          -0.1464291867126764f,
164
60.6M
          0.3085497062849487f,
165
60.6M
          0.0,
166
60.6M
          -0.14642918671266536f,
167
60.6M
          0.4251149611657548f,
168
60.6M
      },
169
60.6M
      {
170
60.6M
          0.0,
171
60.6M
          -0.7071067811865474f,
172
60.6M
          0.0,
173
60.6M
          0.0,
174
60.6M
          0.7071067811865476f,
175
60.6M
          0.0,
176
60.6M
          0.0,
177
60.6M
          0.0,
178
60.6M
          0.0,
179
60.6M
          0.0,
180
60.6M
          0.0,
181
60.6M
          0.0,
182
60.6M
          0.0,
183
60.6M
          0.0,
184
60.6M
          0.0,
185
60.6M
          0.0,
186
60.6M
      },
187
60.6M
      {
188
60.6M
          -0.4105377591765233f,
189
60.6M
          0.6235485373547691f,
190
60.6M
          -0.06435071657946274f,
191
60.6M
          -0.06435071657946266f,
192
60.6M
          0.6235485373547694f,
193
60.6M
          -0.06435071657946284f,
194
60.6M
          -0.0643507165794628f,
195
60.6M
          -0.06435071657946274f,
196
60.6M
          -0.06435071657946272f,
197
60.6M
          -0.06435071657946279f,
198
60.6M
          -0.06435071657946266f,
199
60.6M
          -0.06435071657946277f,
200
60.6M
          -0.06435071657946277f,
201
60.6M
          -0.06435071657946273f,
202
60.6M
          -0.06435071657946274f,
203
60.6M
          -0.0643507165794626f,
204
60.6M
      },
205
60.6M
      {
206
60.6M
          0.0,
207
60.6M
          0.0,
208
60.6M
          -0.4517556589999482f,
209
60.6M
          0.15854503551840063f,
210
60.6M
          0.0,
211
60.6M
          -0.04038515160822202f,
212
60.6M
          0.0074182263792423875f,
213
60.6M
          0.39351034269210167f,
214
60.6M
          -0.45175565899994635f,
215
60.6M
          0.007418226379244351f,
216
60.6M
          0.1107416575309343f,
217
60.6M
          0.08298163094882051f,
218
60.6M
          0.15854503551839705f,
219
60.6M
          0.3935103426921022f,
220
60.6M
          0.0829816309488214f,
221
60.6M
          -0.45175565899994796f,
222
60.6M
      },
223
60.6M
      {
224
60.6M
          0.0,
225
60.6M
          0.0,
226
60.6M
          -0.304684750724869f,
227
60.6M
          0.5112616136591823f,
228
60.6M
          0.0,
229
60.6M
          0.0,
230
60.6M
          -0.290480129728998f,
231
60.6M
          -0.06578701549142804f,
232
60.6M
          0.304684750724884f,
233
60.6M
          0.2904801297290076f,
234
60.6M
          0.0,
235
60.6M
          -0.23889773523344604f,
236
60.6M
          -0.5112616136592012f,
237
60.6M
          0.06578701549142545f,
238
60.6M
          0.23889773523345467f,
239
60.6M
          0.0,
240
60.6M
      },
241
60.6M
      {
242
60.6M
          0.0,
243
60.6M
          0.0,
244
60.6M
          0.3017929516615495f,
245
60.6M
          0.25792362796341184f,
246
60.6M
          0.0,
247
60.6M
          0.16272340142866204f,
248
60.6M
          0.09520022653475037f,
249
60.6M
          0.0,
250
60.6M
          0.3017929516615503f,
251
60.6M
          0.09520022653475055f,
252
60.6M
          -0.16272340142866173f,
253
60.6M
          -0.35312385449816297f,
254
60.6M
          0.25792362796341295f,
255
60.6M
          0.0,
256
60.6M
          -0.3531238544981624f,
257
60.6M
          -0.6035859033230976f,
258
60.6M
      },
259
60.6M
      {
260
60.6M
          0.0,
261
60.6M
          0.0,
262
60.6M
          0.40824829046386274f,
263
60.6M
          0.0,
264
60.6M
          0.0,
265
60.6M
          0.0,
266
60.6M
          0.0,
267
60.6M
          -0.4082482904638628f,
268
60.6M
          -0.4082482904638635f,
269
60.6M
          0.0,
270
60.6M
          0.0,
271
60.6M
          -0.40824829046386296f,
272
60.6M
          0.0,
273
60.6M
          0.4082482904638634f,
274
60.6M
          0.408248290463863f,
275
60.6M
          0.0,
276
60.6M
      },
277
60.6M
      {
278
60.6M
          0.0,
279
60.6M
          0.0,
280
60.6M
          0.1747866975480809f,
281
60.6M
          0.0812611176717539f,
282
60.6M
          0.0,
283
60.6M
          0.0,
284
60.6M
          -0.3675398009862027f,
285
60.6M
          -0.307882213957909f,
286
60.6M
          -0.17478669754808135f,
287
60.6M
          0.3675398009862011f,
288
60.6M
          0.0,
289
60.6M
          0.4826689115059883f,
290
60.6M
          -0.08126111767175039f,
291
60.6M
          0.30788221395790305f,
292
60.6M
          -0.48266891150598584f,
293
60.6M
          0.0,
294
60.6M
      },
295
60.6M
      {
296
60.6M
          0.0,
297
60.6M
          0.0,
298
60.6M
          -0.21105601049335784f,
299
60.6M
          0.18567180916109802f,
300
60.6M
          0.0,
301
60.6M
          0.0,
302
60.6M
          0.49215859013738733f,
303
60.6M
          -0.38525013709251915f,
304
60.6M
          0.21105601049335806f,
305
60.6M
          -0.49215859013738905f,
306
60.6M
          0.0,
307
60.6M
          0.17419412659916217f,
308
60.6M
          -0.18567180916109904f,
309
60.6M
          0.3852501370925211f,
310
60.6M
          -0.1741941265991621f,
311
60.6M
          0.0,
312
60.6M
      },
313
60.6M
      {
314
60.6M
          0.0,
315
60.6M
          0.0,
316
60.6M
          -0.14266084808807264f,
317
60.6M
          -0.3416446842253372f,
318
60.6M
          0.0,
319
60.6M
          0.7367497537172237f,
320
60.6M
          0.24627107722075148f,
321
60.6M
          -0.08574019035519306f,
322
60.6M
          -0.14266084808807344f,
323
60.6M
          0.24627107722075137f,
324
60.6M
          0.14883399227113567f,
325
60.6M
          -0.04768680350229251f,
326
60.6M
          -0.3416446842253373f,
327
60.6M
          -0.08574019035519267f,
328
60.6M
          -0.047686803502292804f,
329
60.6M
          -0.14266084808807242f,
330
60.6M
      },
331
60.6M
      {
332
60.6M
          0.0,
333
60.6M
          0.0,
334
60.6M
          -0.13813540350758585f,
335
60.6M
          0.3302282550303788f,
336
60.6M
          0.0,
337
60.6M
          0.08755115000587084f,
338
60.6M
          -0.07946706605909573f,
339
60.6M
          -0.4613374887461511f,
340
60.6M
          -0.13813540350758294f,
341
60.6M
          -0.07946706605910261f,
342
60.6M
          0.49724647109535086f,
343
60.6M
          0.12538059448563663f,
344
60.6M
          0.3302282550303805f,
345
60.6M
          -0.4613374887461554f,
346
60.6M
          0.12538059448564315f,
347
60.6M
          -0.13813540350758452f,
348
60.6M
      },
349
60.6M
      {
350
60.6M
          0.0,
351
60.6M
          0.0,
352
60.6M
          -0.17437602599651067f,
353
60.6M
          0.0702790691196284f,
354
60.6M
          0.0,
355
60.6M
          -0.2921026642334881f,
356
60.6M
          0.3623817333531167f,
357
60.6M
          0.0,
358
60.6M
          -0.1743760259965108f,
359
60.6M
          0.36238173335311646f,
360
60.6M
          0.29210266423348785f,
361
60.6M
          -0.4326608024727445f,
362
60.6M
          0.07027906911962818f,
363
60.6M
          0.0,
364
60.6M
          -0.4326608024727457f,
365
60.6M
          0.34875205199302267f,
366
60.6M
      },
367
60.6M
      {
368
60.6M
          0.0,
369
60.6M
          0.0,
370
60.6M
          0.11354987314994337f,
371
60.6M
          -0.07417504595810355f,
372
60.6M
          0.0,
373
60.6M
          0.19402893032594343f,
374
60.6M
          -0.435190496523228f,
375
60.6M
          0.21918684838857466f,
376
60.6M
          0.11354987314994257f,
377
60.6M
          -0.4351904965232251f,
378
60.6M
          0.5550443808910661f,
379
60.6M
          -0.25468277124066463f,
380
60.6M
          -0.07417504595810233f,
381
60.6M
          0.2191868483885728f,
382
60.6M
          -0.25468277124066413f,
383
60.6M
          0.1135498731499429f,
384
60.6M
      },
385
60.6M
  };
386
387
60.6M
  const HWY_CAPPED(float, 16) d;
388
181M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
121M
    auto pixel = Zero(d);
390
2.06G
    for (size_t j = 0; j < 16; j++) {
391
1.94G
      auto cf = Set(d, coeffs[j]);
392
1.94G
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
1.94G
      pixel = MulAdd(cf, basis, pixel);
394
1.94G
    }
395
121M
    Store(pixel, d, pixels + i);
396
121M
  }
397
60.6M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
398
399
template <size_t afv_kind>
400
void AFVTransformToPixels(const float* JXL_RESTRICT coefficients,
401
64.2M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
64.2M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
64.2M
  size_t afv_x = afv_kind & 1;
404
64.2M
  size_t afv_y = afv_kind / 2;
405
64.2M
  float dcs[3] = {};
406
64.2M
  float block00 = coefficients[0];
407
64.2M
  float block01 = coefficients[1];
408
64.2M
  float block10 = coefficients[8];
409
64.2M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
64.2M
  dcs[1] = (block00 + block10 - block01);
411
64.2M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
64.2M
  HWY_ALIGN float coeff[4 * 4];
414
64.2M
  coeff[0] = dcs[0];
415
321M
  for (size_t iy = 0; iy < 4; iy++) {
416
1.28G
    for (size_t ix = 0; ix < 4; ix++) {
417
1.02G
      if (ix == 0 && iy == 0) continue;
418
963M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
963M
    }
420
256M
  }
421
64.2M
  HWY_ALIGN float block[4 * 8];
422
64.2M
  AFVIDCT4x4(coeff, block);
423
321M
  for (size_t iy = 0; iy < 4; iy++) {
424
1.28G
    for (size_t ix = 0; ix < 4; ix++) {
425
1.02G
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
1.02G
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
1.02G
    }
428
256M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
64.2M
  block[0] = dcs[1];
431
321M
  for (size_t iy = 0; iy < 4; iy++) {
432
1.28G
    for (size_t ix = 0; ix < 4; ix++) {
433
1.02G
      if (ix == 0 && iy == 0) continue;
434
963M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
963M
    }
436
256M
  }
437
64.2M
  ComputeScaledIDCT<4, 4>()(
438
64.2M
      block,
439
64.2M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
64.2M
            pixels_stride),
441
64.2M
      scratch_space);
442
  // IDCT4x8.
443
64.2M
  block[0] = dcs[2];
444
321M
  for (size_t iy = 0; iy < 4; iy++) {
445
2.31G
    for (size_t ix = 0; ix < 8; ix++) {
446
2.05G
      if (ix == 0 && iy == 0) continue;
447
1.99G
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
1.99G
    }
449
256M
  }
450
64.2M
  ComputeScaledIDCT<4, 8>()(
451
64.2M
      block,
452
64.2M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
64.2M
      scratch_space);
454
64.2M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
775k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
775k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
775k
  size_t afv_x = afv_kind & 1;
404
775k
  size_t afv_y = afv_kind / 2;
405
775k
  float dcs[3] = {};
406
775k
  float block00 = coefficients[0];
407
775k
  float block01 = coefficients[1];
408
775k
  float block10 = coefficients[8];
409
775k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
775k
  dcs[1] = (block00 + block10 - block01);
411
775k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
775k
  HWY_ALIGN float coeff[4 * 4];
414
775k
  coeff[0] = dcs[0];
415
3.87M
  for (size_t iy = 0; iy < 4; iy++) {
416
15.5M
    for (size_t ix = 0; ix < 4; ix++) {
417
12.4M
      if (ix == 0 && iy == 0) continue;
418
11.6M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
11.6M
    }
420
3.10M
  }
421
775k
  HWY_ALIGN float block[4 * 8];
422
775k
  AFVIDCT4x4(coeff, block);
423
3.87M
  for (size_t iy = 0; iy < 4; iy++) {
424
15.5M
    for (size_t ix = 0; ix < 4; ix++) {
425
12.4M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
12.4M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
12.4M
    }
428
3.10M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
775k
  block[0] = dcs[1];
431
3.87M
  for (size_t iy = 0; iy < 4; iy++) {
432
15.5M
    for (size_t ix = 0; ix < 4; ix++) {
433
12.4M
      if (ix == 0 && iy == 0) continue;
434
11.6M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
11.6M
    }
436
3.10M
  }
437
775k
  ComputeScaledIDCT<4, 4>()(
438
775k
      block,
439
775k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
775k
            pixels_stride),
441
775k
      scratch_space);
442
  // IDCT4x8.
443
775k
  block[0] = dcs[2];
444
3.87M
  for (size_t iy = 0; iy < 4; iy++) {
445
27.9M
    for (size_t ix = 0; ix < 8; ix++) {
446
24.8M
      if (ix == 0 && iy == 0) continue;
447
24.0M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
24.0M
    }
449
3.10M
  }
450
775k
  ComputeScaledIDCT<4, 8>()(
451
775k
      block,
452
775k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
775k
      scratch_space);
454
775k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
1.26M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
1.26M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
1.26M
  size_t afv_x = afv_kind & 1;
404
1.26M
  size_t afv_y = afv_kind / 2;
405
1.26M
  float dcs[3] = {};
406
1.26M
  float block00 = coefficients[0];
407
1.26M
  float block01 = coefficients[1];
408
1.26M
  float block10 = coefficients[8];
409
1.26M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
1.26M
  dcs[1] = (block00 + block10 - block01);
411
1.26M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
1.26M
  HWY_ALIGN float coeff[4 * 4];
414
1.26M
  coeff[0] = dcs[0];
415
6.30M
  for (size_t iy = 0; iy < 4; iy++) {
416
25.2M
    for (size_t ix = 0; ix < 4; ix++) {
417
20.1M
      if (ix == 0 && iy == 0) continue;
418
18.9M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
18.9M
    }
420
5.04M
  }
421
1.26M
  HWY_ALIGN float block[4 * 8];
422
1.26M
  AFVIDCT4x4(coeff, block);
423
6.30M
  for (size_t iy = 0; iy < 4; iy++) {
424
25.2M
    for (size_t ix = 0; ix < 4; ix++) {
425
20.1M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
20.1M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
20.1M
    }
428
5.04M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
1.26M
  block[0] = dcs[1];
431
6.30M
  for (size_t iy = 0; iy < 4; iy++) {
432
25.2M
    for (size_t ix = 0; ix < 4; ix++) {
433
20.1M
      if (ix == 0 && iy == 0) continue;
434
18.9M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
18.9M
    }
436
5.04M
  }
437
1.26M
  ComputeScaledIDCT<4, 4>()(
438
1.26M
      block,
439
1.26M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
1.26M
            pixels_stride),
441
1.26M
      scratch_space);
442
  // IDCT4x8.
443
1.26M
  block[0] = dcs[2];
444
6.30M
  for (size_t iy = 0; iy < 4; iy++) {
445
45.3M
    for (size_t ix = 0; ix < 8; ix++) {
446
40.3M
      if (ix == 0 && iy == 0) continue;
447
39.0M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
39.0M
    }
449
5.04M
  }
450
1.26M
  ComputeScaledIDCT<4, 8>()(
451
1.26M
      block,
452
1.26M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
1.26M
      scratch_space);
454
1.26M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
693k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
693k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
693k
  size_t afv_x = afv_kind & 1;
404
693k
  size_t afv_y = afv_kind / 2;
405
693k
  float dcs[3] = {};
406
693k
  float block00 = coefficients[0];
407
693k
  float block01 = coefficients[1];
408
693k
  float block10 = coefficients[8];
409
693k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
693k
  dcs[1] = (block00 + block10 - block01);
411
693k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
693k
  HWY_ALIGN float coeff[4 * 4];
414
693k
  coeff[0] = dcs[0];
415
3.46M
  for (size_t iy = 0; iy < 4; iy++) {
416
13.8M
    for (size_t ix = 0; ix < 4; ix++) {
417
11.0M
      if (ix == 0 && iy == 0) continue;
418
10.3M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
10.3M
    }
420
2.77M
  }
421
693k
  HWY_ALIGN float block[4 * 8];
422
693k
  AFVIDCT4x4(coeff, block);
423
3.46M
  for (size_t iy = 0; iy < 4; iy++) {
424
13.8M
    for (size_t ix = 0; ix < 4; ix++) {
425
11.0M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
11.0M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
11.0M
    }
428
2.77M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
693k
  block[0] = dcs[1];
431
3.46M
  for (size_t iy = 0; iy < 4; iy++) {
432
13.8M
    for (size_t ix = 0; ix < 4; ix++) {
433
11.0M
      if (ix == 0 && iy == 0) continue;
434
10.3M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
10.3M
    }
436
2.77M
  }
437
693k
  ComputeScaledIDCT<4, 4>()(
438
693k
      block,
439
693k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
693k
            pixels_stride),
441
693k
      scratch_space);
442
  // IDCT4x8.
443
693k
  block[0] = dcs[2];
444
3.46M
  for (size_t iy = 0; iy < 4; iy++) {
445
24.9M
    for (size_t ix = 0; ix < 8; ix++) {
446
22.1M
      if (ix == 0 && iy == 0) continue;
447
21.4M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
21.4M
    }
449
2.77M
  }
450
693k
  ComputeScaledIDCT<4, 8>()(
451
693k
      block,
452
693k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
693k
      scratch_space);
454
693k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
847k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
847k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
847k
  size_t afv_x = afv_kind & 1;
404
847k
  size_t afv_y = afv_kind / 2;
405
847k
  float dcs[3] = {};
406
847k
  float block00 = coefficients[0];
407
847k
  float block01 = coefficients[1];
408
847k
  float block10 = coefficients[8];
409
847k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
847k
  dcs[1] = (block00 + block10 - block01);
411
847k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
847k
  HWY_ALIGN float coeff[4 * 4];
414
847k
  coeff[0] = dcs[0];
415
4.23M
  for (size_t iy = 0; iy < 4; iy++) {
416
16.9M
    for (size_t ix = 0; ix < 4; ix++) {
417
13.5M
      if (ix == 0 && iy == 0) continue;
418
12.7M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
12.7M
    }
420
3.38M
  }
421
847k
  HWY_ALIGN float block[4 * 8];
422
847k
  AFVIDCT4x4(coeff, block);
423
4.23M
  for (size_t iy = 0; iy < 4; iy++) {
424
16.9M
    for (size_t ix = 0; ix < 4; ix++) {
425
13.5M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
13.5M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
13.5M
    }
428
3.38M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
847k
  block[0] = dcs[1];
431
4.23M
  for (size_t iy = 0; iy < 4; iy++) {
432
16.9M
    for (size_t ix = 0; ix < 4; ix++) {
433
13.5M
      if (ix == 0 && iy == 0) continue;
434
12.7M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
12.7M
    }
436
3.38M
  }
437
847k
  ComputeScaledIDCT<4, 4>()(
438
847k
      block,
439
847k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
847k
            pixels_stride),
441
847k
      scratch_space);
442
  // IDCT4x8.
443
847k
  block[0] = dcs[2];
444
4.23M
  for (size_t iy = 0; iy < 4; iy++) {
445
30.5M
    for (size_t ix = 0; ix < 8; ix++) {
446
27.1M
      if (ix == 0 && iy == 0) continue;
447
26.2M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
26.2M
    }
449
3.38M
  }
450
847k
  ComputeScaledIDCT<4, 8>()(
451
847k
      block,
452
847k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
847k
      scratch_space);
454
847k
}
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
15.1M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
15.1M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
15.1M
  size_t afv_x = afv_kind & 1;
404
15.1M
  size_t afv_y = afv_kind / 2;
405
15.1M
  float dcs[3] = {};
406
15.1M
  float block00 = coefficients[0];
407
15.1M
  float block01 = coefficients[1];
408
15.1M
  float block10 = coefficients[8];
409
15.1M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
15.1M
  dcs[1] = (block00 + block10 - block01);
411
15.1M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
15.1M
  HWY_ALIGN float coeff[4 * 4];
414
15.1M
  coeff[0] = dcs[0];
415
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
416
303M
    for (size_t ix = 0; ix < 4; ix++) {
417
242M
      if (ix == 0 && iy == 0) continue;
418
227M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
227M
    }
420
60.6M
  }
421
15.1M
  HWY_ALIGN float block[4 * 8];
422
15.1M
  AFVIDCT4x4(coeff, block);
423
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
424
303M
    for (size_t ix = 0; ix < 4; ix++) {
425
242M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
242M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
242M
    }
428
60.6M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
15.1M
  block[0] = dcs[1];
431
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
432
303M
    for (size_t ix = 0; ix < 4; ix++) {
433
242M
      if (ix == 0 && iy == 0) continue;
434
227M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
227M
    }
436
60.6M
  }
437
15.1M
  ComputeScaledIDCT<4, 4>()(
438
15.1M
      block,
439
15.1M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
15.1M
            pixels_stride),
441
15.1M
      scratch_space);
442
  // IDCT4x8.
443
15.1M
  block[0] = dcs[2];
444
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
445
545M
    for (size_t ix = 0; ix < 8; ix++) {
446
485M
      if (ix == 0 && iy == 0) continue;
447
470M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
470M
    }
449
60.6M
  }
450
15.1M
  ComputeScaledIDCT<4, 8>()(
451
15.1M
      block,
452
15.1M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
15.1M
      scratch_space);
454
15.1M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
15.1M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
15.1M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
15.1M
  size_t afv_x = afv_kind & 1;
404
15.1M
  size_t afv_y = afv_kind / 2;
405
15.1M
  float dcs[3] = {};
406
15.1M
  float block00 = coefficients[0];
407
15.1M
  float block01 = coefficients[1];
408
15.1M
  float block10 = coefficients[8];
409
15.1M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
15.1M
  dcs[1] = (block00 + block10 - block01);
411
15.1M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
15.1M
  HWY_ALIGN float coeff[4 * 4];
414
15.1M
  coeff[0] = dcs[0];
415
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
416
303M
    for (size_t ix = 0; ix < 4; ix++) {
417
242M
      if (ix == 0 && iy == 0) continue;
418
227M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
227M
    }
420
60.6M
  }
421
15.1M
  HWY_ALIGN float block[4 * 8];
422
15.1M
  AFVIDCT4x4(coeff, block);
423
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
424
303M
    for (size_t ix = 0; ix < 4; ix++) {
425
242M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
242M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
242M
    }
428
60.6M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
15.1M
  block[0] = dcs[1];
431
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
432
303M
    for (size_t ix = 0; ix < 4; ix++) {
433
242M
      if (ix == 0 && iy == 0) continue;
434
227M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
227M
    }
436
60.6M
  }
437
15.1M
  ComputeScaledIDCT<4, 4>()(
438
15.1M
      block,
439
15.1M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
15.1M
            pixels_stride),
441
15.1M
      scratch_space);
442
  // IDCT4x8.
443
15.1M
  block[0] = dcs[2];
444
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
445
545M
    for (size_t ix = 0; ix < 8; ix++) {
446
485M
      if (ix == 0 && iy == 0) continue;
447
470M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
470M
    }
449
60.6M
  }
450
15.1M
  ComputeScaledIDCT<4, 8>()(
451
15.1M
      block,
452
15.1M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
15.1M
      scratch_space);
454
15.1M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
15.1M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
15.1M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
15.1M
  size_t afv_x = afv_kind & 1;
404
15.1M
  size_t afv_y = afv_kind / 2;
405
15.1M
  float dcs[3] = {};
406
15.1M
  float block00 = coefficients[0];
407
15.1M
  float block01 = coefficients[1];
408
15.1M
  float block10 = coefficients[8];
409
15.1M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
15.1M
  dcs[1] = (block00 + block10 - block01);
411
15.1M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
15.1M
  HWY_ALIGN float coeff[4 * 4];
414
15.1M
  coeff[0] = dcs[0];
415
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
416
303M
    for (size_t ix = 0; ix < 4; ix++) {
417
242M
      if (ix == 0 && iy == 0) continue;
418
227M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
227M
    }
420
60.6M
  }
421
15.1M
  HWY_ALIGN float block[4 * 8];
422
15.1M
  AFVIDCT4x4(coeff, block);
423
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
424
303M
    for (size_t ix = 0; ix < 4; ix++) {
425
242M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
242M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
242M
    }
428
60.6M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
15.1M
  block[0] = dcs[1];
431
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
432
303M
    for (size_t ix = 0; ix < 4; ix++) {
433
242M
      if (ix == 0 && iy == 0) continue;
434
227M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
227M
    }
436
60.6M
  }
437
15.1M
  ComputeScaledIDCT<4, 4>()(
438
15.1M
      block,
439
15.1M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
15.1M
            pixels_stride),
441
15.1M
      scratch_space);
442
  // IDCT4x8.
443
15.1M
  block[0] = dcs[2];
444
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
445
545M
    for (size_t ix = 0; ix < 8; ix++) {
446
485M
      if (ix == 0 && iy == 0) continue;
447
470M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
470M
    }
449
60.6M
  }
450
15.1M
  ComputeScaledIDCT<4, 8>()(
451
15.1M
      block,
452
15.1M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
15.1M
      scratch_space);
454
15.1M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
15.1M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
15.1M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
15.1M
  size_t afv_x = afv_kind & 1;
404
15.1M
  size_t afv_y = afv_kind / 2;
405
15.1M
  float dcs[3] = {};
406
15.1M
  float block00 = coefficients[0];
407
15.1M
  float block01 = coefficients[1];
408
15.1M
  float block10 = coefficients[8];
409
15.1M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
15.1M
  dcs[1] = (block00 + block10 - block01);
411
15.1M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
15.1M
  HWY_ALIGN float coeff[4 * 4];
414
15.1M
  coeff[0] = dcs[0];
415
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
416
303M
    for (size_t ix = 0; ix < 4; ix++) {
417
242M
      if (ix == 0 && iy == 0) continue;
418
227M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
227M
    }
420
60.6M
  }
421
15.1M
  HWY_ALIGN float block[4 * 8];
422
15.1M
  AFVIDCT4x4(coeff, block);
423
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
424
303M
    for (size_t ix = 0; ix < 4; ix++) {
425
242M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
242M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
242M
    }
428
60.6M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
15.1M
  block[0] = dcs[1];
431
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
432
303M
    for (size_t ix = 0; ix < 4; ix++) {
433
242M
      if (ix == 0 && iy == 0) continue;
434
227M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
227M
    }
436
60.6M
  }
437
15.1M
  ComputeScaledIDCT<4, 4>()(
438
15.1M
      block,
439
15.1M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
15.1M
            pixels_stride),
441
15.1M
      scratch_space);
442
  // IDCT4x8.
443
15.1M
  block[0] = dcs[2];
444
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
445
545M
    for (size_t ix = 0; ix < 8; ix++) {
446
485M
      if (ix == 0 && iy == 0) continue;
447
470M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
470M
    }
449
60.6M
  }
450
15.1M
  ComputeScaledIDCT<4, 8>()(
451
15.1M
      block,
452
15.1M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
15.1M
      scratch_space);
454
15.1M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
455
456
HWY_MAYBE_UNUSED void TransformToPixels(const AcStrategyType strategy,
457
                                        float* JXL_RESTRICT coefficients,
458
                                        float* JXL_RESTRICT pixels,
459
                                        size_t pixels_stride,
460
217M
                                        float* scratch_space) {
461
217M
  using Type = AcStrategyType;
462
217M
  switch (strategy) {
463
18.4M
    case Type::IDENTITY: {
464
18.4M
      float dcs[4] = {};
465
18.4M
      float block00 = coefficients[0];
466
18.4M
      float block01 = coefficients[1];
467
18.4M
      float block10 = coefficients[8];
468
18.4M
      float block11 = coefficients[9];
469
18.4M
      dcs[0] = block00 + block01 + block10 + block11;
470
18.4M
      dcs[1] = block00 + block01 - block10 - block11;
471
18.4M
      dcs[2] = block00 - block01 + block10 - block11;
472
18.4M
      dcs[3] = block00 - block01 - block10 + block11;
473
55.2M
      for (size_t y = 0; y < 2; y++) {
474
110M
        for (size_t x = 0; x < 2; x++) {
475
73.6M
          float block_dc = dcs[y * 2 + x];
476
73.6M
          float residual_sum = 0;
477
368M
          for (size_t iy = 0; iy < 4; iy++) {
478
1.47G
            for (size_t ix = 0; ix < 4; ix++) {
479
1.17G
              if (ix == 0 && iy == 0) continue;
480
1.10G
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
1.10G
            }
482
294M
          }
483
73.6M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
73.6M
              block_dc - residual_sum * (1.0f / 16);
485
368M
          for (size_t iy = 0; iy < 4; iy++) {
486
1.47G
            for (size_t ix = 0; ix < 4; ix++) {
487
1.17G
              if (ix == 1 && iy == 1) continue;
488
1.10G
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
1.10G
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
1.10G
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
1.10G
            }
492
294M
          }
493
73.6M
          pixels[y * 4 * pixels_stride + x * 4] =
494
73.6M
              coefficients[(y + 2) * 8 + x + 2] +
495
73.6M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
73.6M
        }
497
36.8M
      }
498
18.4M
      break;
499
0
    }
500
16.6M
    case Type::DCT8X4: {
501
16.6M
      float dcs[2] = {};
502
16.6M
      float block0 = coefficients[0];
503
16.6M
      float block1 = coefficients[8];
504
16.6M
      dcs[0] = block0 + block1;
505
16.6M
      dcs[1] = block0 - block1;
506
49.9M
      for (size_t x = 0; x < 2; x++) {
507
33.2M
        HWY_ALIGN float block[4 * 8];
508
33.2M
        block[0] = dcs[x];
509
166M
        for (size_t iy = 0; iy < 4; iy++) {
510
1.19G
          for (size_t ix = 0; ix < 8; ix++) {
511
1.06G
            if (ix == 0 && iy == 0) continue;
512
1.03G
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
1.03G
          }
514
133M
        }
515
33.2M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
33.2M
                                  scratch_space);
517
33.2M
      }
518
16.6M
      break;
519
0
    }
520
16.1M
    case Type::DCT4X8: {
521
16.1M
      float dcs[2] = {};
522
16.1M
      float block0 = coefficients[0];
523
16.1M
      float block1 = coefficients[8];
524
16.1M
      dcs[0] = block0 + block1;
525
16.1M
      dcs[1] = block0 - block1;
526
48.5M
      for (size_t y = 0; y < 2; y++) {
527
32.3M
        HWY_ALIGN float block[4 * 8];
528
32.3M
        block[0] = dcs[y];
529
161M
        for (size_t iy = 0; iy < 4; iy++) {
530
1.16G
          for (size_t ix = 0; ix < 8; ix++) {
531
1.03G
            if (ix == 0 && iy == 0) continue;
532
1.00G
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
1.00G
          }
534
129M
        }
535
32.3M
        ComputeScaledIDCT<4, 8>()(
536
32.3M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
32.3M
            scratch_space);
538
32.3M
      }
539
16.1M
      break;
540
0
    }
541
15.2M
    case Type::DCT4X4: {
542
15.2M
      float dcs[4] = {};
543
15.2M
      float block00 = coefficients[0];
544
15.2M
      float block01 = coefficients[1];
545
15.2M
      float block10 = coefficients[8];
546
15.2M
      float block11 = coefficients[9];
547
15.2M
      dcs[0] = block00 + block01 + block10 + block11;
548
15.2M
      dcs[1] = block00 + block01 - block10 - block11;
549
15.2M
      dcs[2] = block00 - block01 + block10 - block11;
550
15.2M
      dcs[3] = block00 - block01 - block10 + block11;
551
45.6M
      for (size_t y = 0; y < 2; y++) {
552
91.2M
        for (size_t x = 0; x < 2; x++) {
553
60.8M
          HWY_ALIGN float block[4 * 4];
554
60.8M
          block[0] = dcs[y * 2 + x];
555
304M
          for (size_t iy = 0; iy < 4; iy++) {
556
1.21G
            for (size_t ix = 0; ix < 4; ix++) {
557
973M
              if (ix == 0 && iy == 0) continue;
558
912M
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
912M
            }
560
243M
          }
561
60.8M
          ComputeScaledIDCT<4, 4>()(
562
60.8M
              block,
563
60.8M
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
60.8M
              scratch_space);
565
60.8M
        }
566
30.4M
      }
567
15.2M
      break;
568
0
    }
569
18.3M
    case Type::DCT2X2: {
570
18.3M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
18.3M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
18.3M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
18.3M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
18.3M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
165M
      for (size_t y = 0; y < kBlockDim; y++) {
576
1.32G
        for (size_t x = 0; x < kBlockDim; x++) {
577
1.17G
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
1.17G
        }
579
146M
      }
580
18.3M
      break;
581
0
    }
582
6.93M
    case Type::DCT16X16: {
583
6.93M
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
6.93M
                                  scratch_space);
585
6.93M
      break;
586
0
    }
587
13.4M
    case Type::DCT16X8: {
588
13.4M
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
13.4M
                                 scratch_space);
590
13.4M
      break;
591
0
    }
592
13.9M
    case Type::DCT8X16: {
593
13.9M
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
13.9M
                                 scratch_space);
595
13.9M
      break;
596
0
    }
597
6.36k
    case Type::DCT32X8: {
598
6.36k
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
6.36k
                                 scratch_space);
600
6.36k
      break;
601
0
    }
602
7.75k
    case Type::DCT8X32: {
603
7.75k
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
7.75k
                                 scratch_space);
605
7.75k
      break;
606
0
    }
607
2.72M
    case Type::DCT32X16: {
608
2.72M
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
2.72M
                                  scratch_space);
610
2.72M
      break;
611
0
    }
612
2.88M
    case Type::DCT16X32: {
613
2.88M
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
2.88M
                                  scratch_space);
615
2.88M
      break;
616
0
    }
617
1.79M
    case Type::DCT32X32: {
618
1.79M
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
1.79M
                                  scratch_space);
620
1.79M
      break;
621
0
    }
622
24.9M
    case Type::DCT: {
623
24.9M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
24.9M
                                scratch_space);
625
24.9M
      break;
626
0
    }
627
15.9M
    case Type::AFV0: {
628
15.9M
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
15.9M
      break;
630
0
    }
631
16.4M
    case Type::AFV1: {
632
16.4M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
16.4M
      break;
634
0
    }
635
15.8M
    case Type::AFV2: {
636
15.8M
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
15.8M
      break;
638
0
    }
639
16.0M
    case Type::AFV3: {
640
16.0M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
16.0M
      break;
642
0
    }
643
796k
    case Type::DCT64X32: {
644
796k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
796k
                                  scratch_space);
646
796k
      break;
647
0
    }
648
563k
    case Type::DCT32X64: {
649
563k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
563k
                                  scratch_space);
651
563k
      break;
652
0
    }
653
309k
    case Type::DCT64X64: {
654
309k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
309k
                                  scratch_space);
656
309k
      break;
657
0
    }
658
54
    case Type::DCT128X64: {
659
54
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
54
                                   scratch_space);
661
54
      break;
662
0
    }
663
12
    case Type::DCT64X128: {
664
12
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
12
                                   scratch_space);
666
12
      break;
667
0
    }
668
21
    case Type::DCT128X128: {
669
21
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
21
                                    scratch_space);
671
21
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
27
    case Type::DCT256X256: {
684
27
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
27
                                    scratch_space);
686
27
      break;
687
0
    }
688
217M
  }
689
217M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
26.6M
                                        float* scratch_space) {
461
26.6M
  using Type = AcStrategyType;
462
26.6M
  switch (strategy) {
463
3.24M
    case Type::IDENTITY: {
464
3.24M
      float dcs[4] = {};
465
3.24M
      float block00 = coefficients[0];
466
3.24M
      float block01 = coefficients[1];
467
3.24M
      float block10 = coefficients[8];
468
3.24M
      float block11 = coefficients[9];
469
3.24M
      dcs[0] = block00 + block01 + block10 + block11;
470
3.24M
      dcs[1] = block00 + block01 - block10 - block11;
471
3.24M
      dcs[2] = block00 - block01 + block10 - block11;
472
3.24M
      dcs[3] = block00 - block01 - block10 + block11;
473
9.72M
      for (size_t y = 0; y < 2; y++) {
474
19.4M
        for (size_t x = 0; x < 2; x++) {
475
12.9M
          float block_dc = dcs[y * 2 + x];
476
12.9M
          float residual_sum = 0;
477
64.8M
          for (size_t iy = 0; iy < 4; iy++) {
478
259M
            for (size_t ix = 0; ix < 4; ix++) {
479
207M
              if (ix == 0 && iy == 0) continue;
480
194M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
194M
            }
482
51.8M
          }
483
12.9M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
12.9M
              block_dc - residual_sum * (1.0f / 16);
485
64.8M
          for (size_t iy = 0; iy < 4; iy++) {
486
259M
            for (size_t ix = 0; ix < 4; ix++) {
487
207M
              if (ix == 1 && iy == 1) continue;
488
194M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
194M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
194M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
194M
            }
492
51.8M
          }
493
12.9M
          pixels[y * 4 * pixels_stride + x * 4] =
494
12.9M
              coefficients[(y + 2) * 8 + x + 2] +
495
12.9M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
12.9M
        }
497
6.48M
      }
498
3.24M
      break;
499
0
    }
500
1.47M
    case Type::DCT8X4: {
501
1.47M
      float dcs[2] = {};
502
1.47M
      float block0 = coefficients[0];
503
1.47M
      float block1 = coefficients[8];
504
1.47M
      dcs[0] = block0 + block1;
505
1.47M
      dcs[1] = block0 - block1;
506
4.43M
      for (size_t x = 0; x < 2; x++) {
507
2.95M
        HWY_ALIGN float block[4 * 8];
508
2.95M
        block[0] = dcs[x];
509
14.7M
        for (size_t iy = 0; iy < 4; iy++) {
510
106M
          for (size_t ix = 0; ix < 8; ix++) {
511
94.6M
            if (ix == 0 && iy == 0) continue;
512
91.7M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
91.7M
          }
514
11.8M
        }
515
2.95M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
2.95M
                                  scratch_space);
517
2.95M
      }
518
1.47M
      break;
519
0
    }
520
1.03M
    case Type::DCT4X8: {
521
1.03M
      float dcs[2] = {};
522
1.03M
      float block0 = coefficients[0];
523
1.03M
      float block1 = coefficients[8];
524
1.03M
      dcs[0] = block0 + block1;
525
1.03M
      dcs[1] = block0 - block1;
526
3.09M
      for (size_t y = 0; y < 2; y++) {
527
2.06M
        HWY_ALIGN float block[4 * 8];
528
2.06M
        block[0] = dcs[y];
529
10.3M
        for (size_t iy = 0; iy < 4; iy++) {
530
74.2M
          for (size_t ix = 0; ix < 8; ix++) {
531
66.0M
            if (ix == 0 && iy == 0) continue;
532
63.9M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
63.9M
          }
534
8.25M
        }
535
2.06M
        ComputeScaledIDCT<4, 8>()(
536
2.06M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
2.06M
            scratch_space);
538
2.06M
      }
539
1.03M
      break;
540
0
    }
541
50.7k
    case Type::DCT4X4: {
542
50.7k
      float dcs[4] = {};
543
50.7k
      float block00 = coefficients[0];
544
50.7k
      float block01 = coefficients[1];
545
50.7k
      float block10 = coefficients[8];
546
50.7k
      float block11 = coefficients[9];
547
50.7k
      dcs[0] = block00 + block01 + block10 + block11;
548
50.7k
      dcs[1] = block00 + block01 - block10 - block11;
549
50.7k
      dcs[2] = block00 - block01 + block10 - block11;
550
50.7k
      dcs[3] = block00 - block01 - block10 + block11;
551
152k
      for (size_t y = 0; y < 2; y++) {
552
304k
        for (size_t x = 0; x < 2; x++) {
553
202k
          HWY_ALIGN float block[4 * 4];
554
202k
          block[0] = dcs[y * 2 + x];
555
1.01M
          for (size_t iy = 0; iy < 4; iy++) {
556
4.05M
            for (size_t ix = 0; ix < 4; ix++) {
557
3.24M
              if (ix == 0 && iy == 0) continue;
558
3.04M
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
3.04M
            }
560
811k
          }
561
202k
          ComputeScaledIDCT<4, 4>()(
562
202k
              block,
563
202k
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
202k
              scratch_space);
565
202k
        }
566
101k
      }
567
50.7k
      break;
568
0
    }
569
3.18M
    case Type::DCT2X2: {
570
3.18M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
3.18M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
3.18M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
3.18M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
3.18M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
28.7M
      for (size_t y = 0; y < kBlockDim; y++) {
576
229M
        for (size_t x = 0; x < kBlockDim; x++) {
577
204M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
204M
        }
579
25.5M
      }
580
3.18M
      break;
581
0
    }
582
551k
    case Type::DCT16X16: {
583
551k
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
551k
                                  scratch_space);
585
551k
      break;
586
0
    }
587
972k
    case Type::DCT16X8: {
588
972k
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
972k
                                 scratch_space);
590
972k
      break;
591
0
    }
592
1.41M
    case Type::DCT8X16: {
593
1.41M
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
1.41M
                                 scratch_space);
595
1.41M
      break;
596
0
    }
597
6.36k
    case Type::DCT32X8: {
598
6.36k
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
6.36k
                                 scratch_space);
600
6.36k
      break;
601
0
    }
602
7.75k
    case Type::DCT8X32: {
603
7.75k
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
7.75k
                                 scratch_space);
605
7.75k
      break;
606
0
    }
607
258k
    case Type::DCT32X16: {
608
258k
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
258k
                                  scratch_space);
610
258k
      break;
611
0
    }
612
399k
    case Type::DCT16X32: {
613
399k
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
399k
                                  scratch_space);
615
399k
      break;
616
0
    }
617
531k
    case Type::DCT32X32: {
618
531k
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
531k
                                  scratch_space);
620
531k
      break;
621
0
    }
622
9.75M
    case Type::DCT: {
623
9.75M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
9.75M
                                scratch_space);
625
9.75M
      break;
626
0
    }
627
775k
    case Type::AFV0: {
628
775k
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
775k
      break;
630
0
    }
631
1.26M
    case Type::AFV1: {
632
1.26M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
1.26M
      break;
634
0
    }
635
693k
    case Type::AFV2: {
636
693k
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
693k
      break;
638
0
    }
639
847k
    case Type::AFV3: {
640
847k
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
847k
      break;
642
0
    }
643
18.1k
    case Type::DCT64X32: {
644
18.1k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
18.1k
                                  scratch_space);
646
18.1k
      break;
647
0
    }
648
30.4k
    case Type::DCT32X64: {
649
30.4k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
30.4k
                                  scratch_space);
651
30.4k
      break;
652
0
    }
653
104k
    case Type::DCT64X64: {
654
104k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
104k
                                  scratch_space);
656
104k
      break;
657
0
    }
658
54
    case Type::DCT128X64: {
659
54
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
54
                                   scratch_space);
661
54
      break;
662
0
    }
663
12
    case Type::DCT64X128: {
664
12
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
12
                                   scratch_space);
666
12
      break;
667
0
    }
668
21
    case Type::DCT128X128: {
669
21
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
21
                                    scratch_space);
671
21
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
27
    case Type::DCT256X256: {
684
27
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
27
                                    scratch_space);
686
27
      break;
687
0
    }
688
26.6M
  }
689
26.6M
}
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
190M
                                        float* scratch_space) {
461
190M
  using Type = AcStrategyType;
462
190M
  switch (strategy) {
463
15.1M
    case Type::IDENTITY: {
464
15.1M
      float dcs[4] = {};
465
15.1M
      float block00 = coefficients[0];
466
15.1M
      float block01 = coefficients[1];
467
15.1M
      float block10 = coefficients[8];
468
15.1M
      float block11 = coefficients[9];
469
15.1M
      dcs[0] = block00 + block01 + block10 + block11;
470
15.1M
      dcs[1] = block00 + block01 - block10 - block11;
471
15.1M
      dcs[2] = block00 - block01 + block10 - block11;
472
15.1M
      dcs[3] = block00 - block01 - block10 + block11;
473
45.4M
      for (size_t y = 0; y < 2; y++) {
474
90.9M
        for (size_t x = 0; x < 2; x++) {
475
60.6M
          float block_dc = dcs[y * 2 + x];
476
60.6M
          float residual_sum = 0;
477
303M
          for (size_t iy = 0; iy < 4; iy++) {
478
1.21G
            for (size_t ix = 0; ix < 4; ix++) {
479
970M
              if (ix == 0 && iy == 0) continue;
480
909M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
909M
            }
482
242M
          }
483
60.6M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
60.6M
              block_dc - residual_sum * (1.0f / 16);
485
303M
          for (size_t iy = 0; iy < 4; iy++) {
486
1.21G
            for (size_t ix = 0; ix < 4; ix++) {
487
970M
              if (ix == 1 && iy == 1) continue;
488
909M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
909M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
909M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
909M
            }
492
242M
          }
493
60.6M
          pixels[y * 4 * pixels_stride + x * 4] =
494
60.6M
              coefficients[(y + 2) * 8 + x + 2] +
495
60.6M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
60.6M
        }
497
30.3M
      }
498
15.1M
      break;
499
0
    }
500
15.1M
    case Type::DCT8X4: {
501
15.1M
      float dcs[2] = {};
502
15.1M
      float block0 = coefficients[0];
503
15.1M
      float block1 = coefficients[8];
504
15.1M
      dcs[0] = block0 + block1;
505
15.1M
      dcs[1] = block0 - block1;
506
45.4M
      for (size_t x = 0; x < 2; x++) {
507
30.3M
        HWY_ALIGN float block[4 * 8];
508
30.3M
        block[0] = dcs[x];
509
151M
        for (size_t iy = 0; iy < 4; iy++) {
510
1.09G
          for (size_t ix = 0; ix < 8; ix++) {
511
970M
            if (ix == 0 && iy == 0) continue;
512
940M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
940M
          }
514
121M
        }
515
30.3M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
30.3M
                                  scratch_space);
517
30.3M
      }
518
15.1M
      break;
519
0
    }
520
15.1M
    case Type::DCT4X8: {
521
15.1M
      float dcs[2] = {};
522
15.1M
      float block0 = coefficients[0];
523
15.1M
      float block1 = coefficients[8];
524
15.1M
      dcs[0] = block0 + block1;
525
15.1M
      dcs[1] = block0 - block1;
526
45.4M
      for (size_t y = 0; y < 2; y++) {
527
30.3M
        HWY_ALIGN float block[4 * 8];
528
30.3M
        block[0] = dcs[y];
529
151M
        for (size_t iy = 0; iy < 4; iy++) {
530
1.09G
          for (size_t ix = 0; ix < 8; ix++) {
531
970M
            if (ix == 0 && iy == 0) continue;
532
940M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
940M
          }
534
121M
        }
535
30.3M
        ComputeScaledIDCT<4, 8>()(
536
30.3M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
30.3M
            scratch_space);
538
30.3M
      }
539
15.1M
      break;
540
0
    }
541
15.1M
    case Type::DCT4X4: {
542
15.1M
      float dcs[4] = {};
543
15.1M
      float block00 = coefficients[0];
544
15.1M
      float block01 = coefficients[1];
545
15.1M
      float block10 = coefficients[8];
546
15.1M
      float block11 = coefficients[9];
547
15.1M
      dcs[0] = block00 + block01 + block10 + block11;
548
15.1M
      dcs[1] = block00 + block01 - block10 - block11;
549
15.1M
      dcs[2] = block00 - block01 + block10 - block11;
550
15.1M
      dcs[3] = block00 - block01 - block10 + block11;
551
45.4M
      for (size_t y = 0; y < 2; y++) {
552
90.9M
        for (size_t x = 0; x < 2; x++) {
553
60.6M
          HWY_ALIGN float block[4 * 4];
554
60.6M
          block[0] = dcs[y * 2 + x];
555
303M
          for (size_t iy = 0; iy < 4; iy++) {
556
1.21G
            for (size_t ix = 0; ix < 4; ix++) {
557
970M
              if (ix == 0 && iy == 0) continue;
558
909M
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
909M
            }
560
242M
          }
561
60.6M
          ComputeScaledIDCT<4, 4>()(
562
60.6M
              block,
563
60.6M
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
60.6M
              scratch_space);
565
60.6M
        }
566
30.3M
      }
567
15.1M
      break;
568
0
    }
569
15.1M
    case Type::DCT2X2: {
570
15.1M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
15.1M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
15.1M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
15.1M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
15.1M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
136M
      for (size_t y = 0; y < kBlockDim; y++) {
576
1.09G
        for (size_t x = 0; x < kBlockDim; x++) {
577
970M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
970M
        }
579
121M
      }
580
15.1M
      break;
581
0
    }
582
6.38M
    case Type::DCT16X16: {
583
6.38M
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
6.38M
                                  scratch_space);
585
6.38M
      break;
586
0
    }
587
12.4M
    case Type::DCT16X8: {
588
12.4M
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
12.4M
                                 scratch_space);
590
12.4M
      break;
591
0
    }
592
12.5M
    case Type::DCT8X16: {
593
12.5M
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
12.5M
                                 scratch_space);
595
12.5M
      break;
596
0
    }
597
0
    case Type::DCT32X8: {
598
0
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
0
                                 scratch_space);
600
0
      break;
601
0
    }
602
0
    case Type::DCT8X32: {
603
0
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
0
                                 scratch_space);
605
0
      break;
606
0
    }
607
2.46M
    case Type::DCT32X16: {
608
2.46M
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
2.46M
                                  scratch_space);
610
2.46M
      break;
611
0
    }
612
2.48M
    case Type::DCT16X32: {
613
2.48M
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
2.48M
                                  scratch_space);
615
2.48M
      break;
616
0
    }
617
1.26M
    case Type::DCT32X32: {
618
1.26M
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
1.26M
                                  scratch_space);
620
1.26M
      break;
621
0
    }
622
15.1M
    case Type::DCT: {
623
15.1M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
15.1M
                                scratch_space);
625
15.1M
      break;
626
0
    }
627
15.1M
    case Type::AFV0: {
628
15.1M
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
15.1M
      break;
630
0
    }
631
15.1M
    case Type::AFV1: {
632
15.1M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
15.1M
      break;
634
0
    }
635
15.1M
    case Type::AFV2: {
636
15.1M
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
15.1M
      break;
638
0
    }
639
15.1M
    case Type::AFV3: {
640
15.1M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
15.1M
      break;
642
0
    }
643
778k
    case Type::DCT64X32: {
644
778k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
778k
                                  scratch_space);
646
778k
      break;
647
0
    }
648
533k
    case Type::DCT32X64: {
649
533k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
533k
                                  scratch_space);
651
533k
      break;
652
0
    }
653
205k
    case Type::DCT64X64: {
654
205k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
205k
                                  scratch_space);
656
205k
      break;
657
0
    }
658
0
    case Type::DCT128X64: {
659
0
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT64X128: {
664
0
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
0
    case Type::DCT128X128: {
669
0
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
0
                                    scratch_space);
671
0
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
190M
  }
689
190M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
690
691
HWY_MAYBE_UNUSED void LowestFrequenciesFromDC(const AcStrategyType strategy,
692
                                              const float* dc, size_t dc_stride,
693
                                              float* llf,
694
26.6M
                                              float* JXL_RESTRICT scratch) {
695
26.6M
  using Type = AcStrategyType;
696
26.6M
  HWY_ALIGN float warm_block[4 * 4];
697
26.6M
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
26.6M
  switch (strategy) {
699
972k
    case Type::DCT16X8: {
700
972k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
972k
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
972k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
972k
      break;
704
0
    }
705
1.41M
    case Type::DCT8X16: {
706
1.41M
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
1.41M
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
1.41M
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
1.41M
      break;
710
0
    }
711
551k
    case Type::DCT16X16: {
712
551k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
551k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
551k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
551k
      break;
716
0
    }
717
6.36k
    case Type::DCT32X8: {
718
6.36k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
6.36k
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
6.36k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
6.36k
      break;
722
0
    }
723
7.75k
    case Type::DCT8X32: {
724
7.75k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
7.75k
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
7.75k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
7.75k
      break;
728
0
    }
729
258k
    case Type::DCT32X16: {
730
258k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
258k
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
258k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
258k
      break;
734
0
    }
735
399k
    case Type::DCT16X32: {
736
399k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
399k
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
399k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
399k
      break;
740
0
    }
741
531k
    case Type::DCT32X32: {
742
531k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
531k
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
531k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
531k
      break;
746
0
    }
747
18.1k
    case Type::DCT64X32: {
748
18.1k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
18.1k
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
18.1k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
18.1k
      break;
752
0
    }
753
30.4k
    case Type::DCT32X64: {
754
30.4k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
30.4k
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
30.4k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
30.4k
      break;
758
0
    }
759
104k
    case Type::DCT64X64: {
760
104k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
104k
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
104k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
104k
      break;
764
0
    }
765
54
    case Type::DCT128X64: {
766
54
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
54
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
54
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
54
      break;
770
0
    }
771
12
    case Type::DCT64X128: {
772
12
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
12
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
12
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
12
      break;
776
0
    }
777
21
    case Type::DCT128X128: {
778
21
      ReinterpretingDCT<
779
21
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
21
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
21
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
21
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
27
    case Type::DCT256X256: {
799
27
      ReinterpretingDCT<
800
27
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
27
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
27
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
27
      break;
804
0
    }
805
9.75M
    case Type::DCT:
806
12.9M
    case Type::DCT2X2:
807
12.9M
    case Type::DCT4X4:
808
14.0M
    case Type::DCT4X8:
809
15.5M
    case Type::DCT8X4:
810
16.2M
    case Type::AFV0:
811
17.5M
    case Type::AFV1:
812
18.2M
    case Type::AFV2:
813
19.0M
    case Type::AFV3:
814
22.3M
    case Type::IDENTITY:
815
22.3M
      llf[0] = dc[0];
816
22.3M
      break;
817
26.6M
  };
818
26.6M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
694
26.6M
                                              float* JXL_RESTRICT scratch) {
695
26.6M
  using Type = AcStrategyType;
696
26.6M
  HWY_ALIGN float warm_block[4 * 4];
697
26.6M
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
26.6M
  switch (strategy) {
699
972k
    case Type::DCT16X8: {
700
972k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
972k
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
972k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
972k
      break;
704
0
    }
705
1.41M
    case Type::DCT8X16: {
706
1.41M
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
1.41M
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
1.41M
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
1.41M
      break;
710
0
    }
711
551k
    case Type::DCT16X16: {
712
551k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
551k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
551k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
551k
      break;
716
0
    }
717
6.36k
    case Type::DCT32X8: {
718
6.36k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
6.36k
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
6.36k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
6.36k
      break;
722
0
    }
723
7.75k
    case Type::DCT8X32: {
724
7.75k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
7.75k
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
7.75k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
7.75k
      break;
728
0
    }
729
258k
    case Type::DCT32X16: {
730
258k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
258k
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
258k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
258k
      break;
734
0
    }
735
399k
    case Type::DCT16X32: {
736
399k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
399k
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
399k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
399k
      break;
740
0
    }
741
531k
    case Type::DCT32X32: {
742
531k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
531k
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
531k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
531k
      break;
746
0
    }
747
18.1k
    case Type::DCT64X32: {
748
18.1k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
18.1k
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
18.1k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
18.1k
      break;
752
0
    }
753
30.4k
    case Type::DCT32X64: {
754
30.4k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
30.4k
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
30.4k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
30.4k
      break;
758
0
    }
759
104k
    case Type::DCT64X64: {
760
104k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
104k
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
104k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
104k
      break;
764
0
    }
765
54
    case Type::DCT128X64: {
766
54
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
54
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
54
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
54
      break;
770
0
    }
771
12
    case Type::DCT64X128: {
772
12
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
12
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
12
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
12
      break;
776
0
    }
777
21
    case Type::DCT128X128: {
778
21
      ReinterpretingDCT<
779
21
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
21
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
21
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
21
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
27
    case Type::DCT256X256: {
799
27
      ReinterpretingDCT<
800
27
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
27
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
27
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
27
      break;
804
0
    }
805
9.75M
    case Type::DCT:
806
12.9M
    case Type::DCT2X2:
807
12.9M
    case Type::DCT4X4:
808
14.0M
    case Type::DCT4X8:
809
15.5M
    case Type::DCT8X4:
810
16.2M
    case Type::AFV0:
811
17.5M
    case Type::AFV1:
812
18.2M
    case Type::AFV2:
813
19.0M
    case Type::AFV3:
814
22.3M
    case Type::IDENTITY:
815
22.3M
      llf[0] = dc[0];
816
22.3M
      break;
817
26.6M
  };
818
26.6M
}
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
819
820
}  // namespace
821
// NOLINTNEXTLINE(google-readability-namespace-comments)
822
}  // namespace HWY_NAMESPACE
823
}  // namespace jxl
824
HWY_AFTER_NAMESPACE();
825
826
#endif  // LIB_JXL_DEC_TRANSFORMS_INL_H_