Coverage Report

Created: 2026-01-20 07:37

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/dec_transforms-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include <cstring>
7
8
#include "lib/jxl/base/compiler_specific.h"
9
#include "lib/jxl/frame_dimensions.h"
10
11
#if defined(LIB_JXL_DEC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
12
#ifdef LIB_JXL_DEC_TRANSFORMS_INL_H_
13
#undef LIB_JXL_DEC_TRANSFORMS_INL_H_
14
#else
15
#define LIB_JXL_DEC_TRANSFORMS_INL_H_
16
#endif
17
18
#include <cstddef>
19
#include <hwy/highway.h>
20
21
#include "lib/jxl/ac_strategy.h"
22
#include "lib/jxl/dct-inl.h"
23
#include "lib/jxl/dct_scales.h"
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
namespace HWY_NAMESPACE {
27
namespace {
28
29
// These templates are not found via ADL.
30
using hwy::HWY_NAMESPACE::MulAdd;
31
32
// Computes the lowest-frequency LF_ROWSxLF_COLS-sized square in output, which
33
// is a DCT_ROWS*DCT_COLS-sized DCT block, by doing a ROWS*COLS DCT on the
34
// input block.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
JXL_INLINE void ReinterpretingDCT(const float* input, const size_t input_stride,
38
                                  float* output, const size_t output_stride,
39
                                  float* JXL_RESTRICT block,
40
5.15M
                                  float* JXL_RESTRICT scratch_space) {
41
5.15M
  static_assert(LF_ROWS == ROWS,
42
5.15M
                "ReinterpretingDCT should only be called with LF == N");
43
5.15M
  static_assert(LF_COLS == COLS,
44
5.15M
                "ReinterpretingDCT should only be called with LF == N");
45
5.15M
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
5.15M
                                 scratch_space);
47
5.15M
  if (ROWS < COLS) {
48
5.05M
    for (size_t y = 0; y < LF_ROWS; y++) {
49
11.3M
      for (size_t x = 0; x < LF_COLS; x++) {
50
8.49M
        output[y * output_stride + x] =
51
8.49M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
8.49M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
8.49M
      }
54
2.82M
    }
55
2.92M
  } else {
56
10.0M
    for (size_t y = 0; y < LF_COLS; y++) {
57
35.5M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
28.4M
        output[y * output_stride + x] =
59
28.4M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
28.4M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
28.4M
      }
62
7.11M
    }
63
2.92M
  }
64
5.15M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
1.09M
                                  float* JXL_RESTRICT scratch_space) {
41
1.09M
  static_assert(LF_ROWS == ROWS,
42
1.09M
                "ReinterpretingDCT should only be called with LF == N");
43
1.09M
  static_assert(LF_COLS == COLS,
44
1.09M
                "ReinterpretingDCT should only be called with LF == N");
45
1.09M
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
1.09M
                                 scratch_space);
47
1.09M
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
1.09M
  } else {
56
2.18M
    for (size_t y = 0; y < LF_COLS; y++) {
57
3.28M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
2.18M
        output[y * output_stride + x] =
59
2.18M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
2.18M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
2.18M
      }
62
1.09M
    }
63
1.09M
  }
64
1.09M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
1.69M
                                  float* JXL_RESTRICT scratch_space) {
41
1.69M
  static_assert(LF_ROWS == ROWS,
42
1.69M
                "ReinterpretingDCT should only be called with LF == N");
43
1.69M
  static_assert(LF_COLS == COLS,
44
1.69M
                "ReinterpretingDCT should only be called with LF == N");
45
1.69M
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
1.69M
                                 scratch_space);
47
1.69M
  if (ROWS < COLS) {
48
3.38M
    for (size_t y = 0; y < LF_ROWS; y++) {
49
5.07M
      for (size_t x = 0; x < LF_COLS; x++) {
50
3.38M
        output[y * output_stride + x] =
51
3.38M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
3.38M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
3.38M
      }
54
1.69M
    }
55
1.69M
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
1.69M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
608k
                                  float* JXL_RESTRICT scratch_space) {
41
608k
  static_assert(LF_ROWS == ROWS,
42
608k
                "ReinterpretingDCT should only be called with LF == N");
43
608k
  static_assert(LF_COLS == COLS,
44
608k
                "ReinterpretingDCT should only be called with LF == N");
45
608k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
608k
                                 scratch_space);
47
608k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
608k
  } else {
56
1.82M
    for (size_t y = 0; y < LF_COLS; y++) {
57
3.65M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
2.43M
        output[y * output_stride + x] =
59
2.43M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
2.43M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
2.43M
      }
62
1.21M
    }
63
608k
  }
64
608k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
4.82k
                                  float* JXL_RESTRICT scratch_space) {
41
4.82k
  static_assert(LF_ROWS == ROWS,
42
4.82k
                "ReinterpretingDCT should only be called with LF == N");
43
4.82k
  static_assert(LF_COLS == COLS,
44
4.82k
                "ReinterpretingDCT should only be called with LF == N");
45
4.82k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
4.82k
                                 scratch_space);
47
4.82k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
4.82k
  } else {
56
9.65k
    for (size_t y = 0; y < LF_COLS; y++) {
57
24.1k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
19.3k
        output[y * output_stride + x] =
59
19.3k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
19.3k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
19.3k
      }
62
4.82k
    }
63
4.82k
  }
64
4.82k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
7.75k
                                  float* JXL_RESTRICT scratch_space) {
41
7.75k
  static_assert(LF_ROWS == ROWS,
42
7.75k
                "ReinterpretingDCT should only be called with LF == N");
43
7.75k
  static_assert(LF_COLS == COLS,
44
7.75k
                "ReinterpretingDCT should only be called with LF == N");
45
7.75k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
7.75k
                                 scratch_space);
47
7.75k
  if (ROWS < COLS) {
48
15.5k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
38.7k
      for (size_t x = 0; x < LF_COLS; x++) {
50
31.0k
        output[y * output_stride + x] =
51
31.0k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
31.0k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
31.0k
      }
54
7.75k
    }
55
7.75k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
7.75k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
309k
                                  float* JXL_RESTRICT scratch_space) {
41
309k
  static_assert(LF_ROWS == ROWS,
42
309k
                "ReinterpretingDCT should only be called with LF == N");
43
309k
  static_assert(LF_COLS == COLS,
44
309k
                "ReinterpretingDCT should only be called with LF == N");
45
309k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
309k
                                 scratch_space);
47
309k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
309k
  } else {
56
929k
    for (size_t y = 0; y < LF_COLS; y++) {
57
3.09M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
2.47M
        output[y * output_stride + x] =
59
2.47M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
2.47M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
2.47M
      }
62
619k
    }
63
309k
  }
64
309k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
494k
                                  float* JXL_RESTRICT scratch_space) {
41
494k
  static_assert(LF_ROWS == ROWS,
42
494k
                "ReinterpretingDCT should only be called with LF == N");
43
494k
  static_assert(LF_COLS == COLS,
44
494k
                "ReinterpretingDCT should only be called with LF == N");
45
494k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
494k
                                 scratch_space);
47
494k
  if (ROWS < COLS) {
48
1.48M
    for (size_t y = 0; y < LF_ROWS; y++) {
49
4.94M
      for (size_t x = 0; x < LF_COLS; x++) {
50
3.95M
        output[y * output_stride + x] =
51
3.95M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
3.95M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
3.95M
      }
54
988k
    }
55
494k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
494k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
756k
                                  float* JXL_RESTRICT scratch_space) {
41
756k
  static_assert(LF_ROWS == ROWS,
42
756k
                "ReinterpretingDCT should only be called with LF == N");
43
756k
  static_assert(LF_COLS == COLS,
44
756k
                "ReinterpretingDCT should only be called with LF == N");
45
756k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
756k
                                 scratch_space);
47
756k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
756k
  } else {
56
3.78M
    for (size_t y = 0; y < LF_COLS; y++) {
57
15.1M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
12.1M
        output[y * output_stride + x] =
59
12.1M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
12.1M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
12.1M
      }
62
3.02M
    }
63
756k
  }
64
756k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
20.9k
                                  float* JXL_RESTRICT scratch_space) {
41
20.9k
  static_assert(LF_ROWS == ROWS,
42
20.9k
                "ReinterpretingDCT should only be called with LF == N");
43
20.9k
  static_assert(LF_COLS == COLS,
44
20.9k
                "ReinterpretingDCT should only be called with LF == N");
45
20.9k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
20.9k
                                 scratch_space);
47
20.9k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
20.9k
  } else {
56
104k
    for (size_t y = 0; y < LF_COLS; y++) {
57
753k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
669k
        output[y * output_stride + x] =
59
669k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
669k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
669k
      }
62
83.6k
    }
63
20.9k
  }
64
20.9k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
35.1k
                                  float* JXL_RESTRICT scratch_space) {
41
35.1k
  static_assert(LF_ROWS == ROWS,
42
35.1k
                "ReinterpretingDCT should only be called with LF == N");
43
35.1k
  static_assert(LF_COLS == COLS,
44
35.1k
                "ReinterpretingDCT should only be called with LF == N");
45
35.1k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
35.1k
                                 scratch_space);
47
35.1k
  if (ROWS < COLS) {
48
175k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
1.26M
      for (size_t x = 0; x < LF_COLS; x++) {
50
1.12M
        output[y * output_stride + x] =
51
1.12M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
1.12M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
1.12M
      }
54
140k
    }
55
35.1k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
35.1k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
132k
                                  float* JXL_RESTRICT scratch_space) {
41
132k
  static_assert(LF_ROWS == ROWS,
42
132k
                "ReinterpretingDCT should only be called with LF == N");
43
132k
  static_assert(LF_COLS == COLS,
44
132k
                "ReinterpretingDCT should only be called with LF == N");
45
132k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
132k
                                 scratch_space);
47
132k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
132k
  } else {
56
1.19M
    for (size_t y = 0; y < LF_COLS; y++) {
57
9.56M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
8.50M
        output[y * output_stride + x] =
59
8.50M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
8.50M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
8.50M
      }
62
1.06M
    }
63
132k
  }
64
132k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
54
                                  float* JXL_RESTRICT scratch_space) {
41
54
  static_assert(LF_ROWS == ROWS,
42
54
                "ReinterpretingDCT should only be called with LF == N");
43
54
  static_assert(LF_COLS == COLS,
44
54
                "ReinterpretingDCT should only be called with LF == N");
45
54
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
54
                                 scratch_space);
47
54
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
54
  } else {
56
486
    for (size_t y = 0; y < LF_COLS; y++) {
57
7.34k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
6.91k
        output[y * output_stride + x] =
59
6.91k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
6.91k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
6.91k
      }
62
432
    }
63
54
  }
64
54
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
12
                                  float* JXL_RESTRICT scratch_space) {
41
12
  static_assert(LF_ROWS == ROWS,
42
12
                "ReinterpretingDCT should only be called with LF == N");
43
12
  static_assert(LF_COLS == COLS,
44
12
                "ReinterpretingDCT should only be called with LF == N");
45
12
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
12
                                 scratch_space);
47
12
  if (ROWS < COLS) {
48
108
    for (size_t y = 0; y < LF_ROWS; y++) {
49
1.63k
      for (size_t x = 0; x < LF_COLS; x++) {
50
1.53k
        output[y * output_stride + x] =
51
1.53k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
1.53k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
1.53k
      }
54
96
    }
55
12
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
12
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
24
                                  float* JXL_RESTRICT scratch_space) {
41
24
  static_assert(LF_ROWS == ROWS,
42
24
                "ReinterpretingDCT should only be called with LF == N");
43
24
  static_assert(LF_COLS == COLS,
44
24
                "ReinterpretingDCT should only be called with LF == N");
45
24
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
24
                                 scratch_space);
47
24
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
24
  } else {
56
408
    for (size_t y = 0; y < LF_COLS; y++) {
57
6.52k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
6.14k
        output[y * output_stride + x] =
59
6.14k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
6.14k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
6.14k
      }
62
384
    }
63
24
  }
64
24
}
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
30
                                  float* JXL_RESTRICT scratch_space) {
41
30
  static_assert(LF_ROWS == ROWS,
42
30
                "ReinterpretingDCT should only be called with LF == N");
43
30
  static_assert(LF_COLS == COLS,
44
30
                "ReinterpretingDCT should only be called with LF == N");
45
30
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
30
                                 scratch_space);
47
30
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
30
  } else {
56
990
    for (size_t y = 0; y < LF_COLS; y++) {
57
31.6k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
30.7k
        output[y * output_stride + x] =
59
30.7k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
30.7k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
30.7k
      }
62
960
    }
63
30
  }
64
30
}
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
65
66
template <size_t S>
67
67.1M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
67.1M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
67.1M
  static_assert(S % 2 == 0, "S should be even");
70
67.1M
  float temp[kDCTBlockSize];
71
67.1M
  constexpr size_t num_2x2 = S / 2;
72
223M
  for (size_t y = 0; y < num_2x2; y++) {
73
626M
    for (size_t x = 0; x < num_2x2; x++) {
74
470M
      float c00 = block[y * kBlockDim + x];
75
470M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
470M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
470M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
470M
      float r00 = c00 + c01 + c10 + c11;
79
470M
      float r01 = c00 + c01 - c10 - c11;
80
470M
      float r10 = c00 - c01 + c10 - c11;
81
470M
      float r11 = c00 - c01 - c10 + c11;
82
470M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
470M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
470M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
470M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
470M
    }
87
156M
  }
88
380M
  for (size_t y = 0; y < S; y++) {
89
2.19G
    for (size_t x = 0; x < S; x++) {
90
1.88G
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
1.88G
    }
92
313M
  }
93
67.1M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
3.56M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
3.56M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
3.56M
  static_assert(S % 2 == 0, "S should be even");
70
3.56M
  float temp[kDCTBlockSize];
71
3.56M
  constexpr size_t num_2x2 = S / 2;
72
7.13M
  for (size_t y = 0; y < num_2x2; y++) {
73
7.13M
    for (size_t x = 0; x < num_2x2; x++) {
74
3.56M
      float c00 = block[y * kBlockDim + x];
75
3.56M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
3.56M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
3.56M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
3.56M
      float r00 = c00 + c01 + c10 + c11;
79
3.56M
      float r01 = c00 + c01 - c10 - c11;
80
3.56M
      float r10 = c00 - c01 + c10 - c11;
81
3.56M
      float r11 = c00 - c01 - c10 + c11;
82
3.56M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
3.56M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
3.56M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
3.56M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
3.56M
    }
87
3.56M
  }
88
10.6M
  for (size_t y = 0; y < S; y++) {
89
21.3M
    for (size_t x = 0; x < S; x++) {
90
14.2M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
14.2M
    }
92
7.13M
  }
93
3.56M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
3.56M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
3.56M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
3.56M
  static_assert(S % 2 == 0, "S should be even");
70
3.56M
  float temp[kDCTBlockSize];
71
3.56M
  constexpr size_t num_2x2 = S / 2;
72
10.6M
  for (size_t y = 0; y < num_2x2; y++) {
73
21.3M
    for (size_t x = 0; x < num_2x2; x++) {
74
14.2M
      float c00 = block[y * kBlockDim + x];
75
14.2M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
14.2M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
14.2M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
14.2M
      float r00 = c00 + c01 + c10 + c11;
79
14.2M
      float r01 = c00 + c01 - c10 - c11;
80
14.2M
      float r10 = c00 - c01 + c10 - c11;
81
14.2M
      float r11 = c00 - c01 - c10 + c11;
82
14.2M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
14.2M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
14.2M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
14.2M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
14.2M
    }
87
7.13M
  }
88
17.8M
  for (size_t y = 0; y < S; y++) {
89
71.3M
    for (size_t x = 0; x < S; x++) {
90
57.0M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
57.0M
    }
92
14.2M
  }
93
3.56M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
3.56M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
3.56M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
3.56M
  static_assert(S % 2 == 0, "S should be even");
70
3.56M
  float temp[kDCTBlockSize];
71
3.56M
  constexpr size_t num_2x2 = S / 2;
72
17.8M
  for (size_t y = 0; y < num_2x2; y++) {
73
71.3M
    for (size_t x = 0; x < num_2x2; x++) {
74
57.0M
      float c00 = block[y * kBlockDim + x];
75
57.0M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
57.0M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
57.0M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
57.0M
      float r00 = c00 + c01 + c10 + c11;
79
57.0M
      float r01 = c00 + c01 - c10 - c11;
80
57.0M
      float r10 = c00 - c01 + c10 - c11;
81
57.0M
      float r11 = c00 - c01 - c10 + c11;
82
57.0M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
57.0M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
57.0M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
57.0M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
57.0M
    }
87
14.2M
  }
88
32.0M
  for (size_t y = 0; y < S; y++) {
89
256M
    for (size_t x = 0; x < S; x++) {
90
228M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
228M
    }
92
28.5M
  }
93
3.56M
}
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
18.8M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
18.8M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
18.8M
  static_assert(S % 2 == 0, "S should be even");
70
18.8M
  float temp[kDCTBlockSize];
71
18.8M
  constexpr size_t num_2x2 = S / 2;
72
37.6M
  for (size_t y = 0; y < num_2x2; y++) {
73
37.6M
    for (size_t x = 0; x < num_2x2; x++) {
74
18.8M
      float c00 = block[y * kBlockDim + x];
75
18.8M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
18.8M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
18.8M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
18.8M
      float r00 = c00 + c01 + c10 + c11;
79
18.8M
      float r01 = c00 + c01 - c10 - c11;
80
18.8M
      float r10 = c00 - c01 + c10 - c11;
81
18.8M
      float r11 = c00 - c01 - c10 + c11;
82
18.8M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
18.8M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
18.8M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
18.8M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
18.8M
    }
87
18.8M
  }
88
56.4M
  for (size_t y = 0; y < S; y++) {
89
112M
    for (size_t x = 0; x < S; x++) {
90
75.2M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
75.2M
    }
92
37.6M
  }
93
18.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
18.8M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
18.8M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
18.8M
  static_assert(S % 2 == 0, "S should be even");
70
18.8M
  float temp[kDCTBlockSize];
71
18.8M
  constexpr size_t num_2x2 = S / 2;
72
56.4M
  for (size_t y = 0; y < num_2x2; y++) {
73
112M
    for (size_t x = 0; x < num_2x2; x++) {
74
75.2M
      float c00 = block[y * kBlockDim + x];
75
75.2M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
75.2M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
75.2M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
75.2M
      float r00 = c00 + c01 + c10 + c11;
79
75.2M
      float r01 = c00 + c01 - c10 - c11;
80
75.2M
      float r10 = c00 - c01 + c10 - c11;
81
75.2M
      float r11 = c00 - c01 - c10 + c11;
82
75.2M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
75.2M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
75.2M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
75.2M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
75.2M
    }
87
37.6M
  }
88
94.0M
  for (size_t y = 0; y < S; y++) {
89
376M
    for (size_t x = 0; x < S; x++) {
90
301M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
301M
    }
92
75.2M
  }
93
18.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
18.8M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
18.8M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
18.8M
  static_assert(S % 2 == 0, "S should be even");
70
18.8M
  float temp[kDCTBlockSize];
71
18.8M
  constexpr size_t num_2x2 = S / 2;
72
94.0M
  for (size_t y = 0; y < num_2x2; y++) {
73
376M
    for (size_t x = 0; x < num_2x2; x++) {
74
301M
      float c00 = block[y * kBlockDim + x];
75
301M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
301M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
301M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
301M
      float r00 = c00 + c01 + c10 + c11;
79
301M
      float r01 = c00 + c01 - c10 - c11;
80
301M
      float r10 = c00 - c01 + c10 - c11;
81
301M
      float r11 = c00 - c01 - c10 + c11;
82
301M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
301M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
301M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
301M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
301M
    }
87
75.2M
  }
88
169M
  for (size_t y = 0; y < S; y++) {
89
1.35G
    for (size_t x = 0; x < S; x++) {
90
1.20G
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
1.20G
    }
92
150M
  }
93
18.8M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
94
95
80.4M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
80.4M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
80.4M
      {
98
80.4M
          0.25,
99
80.4M
          0.25,
100
80.4M
          0.25,
101
80.4M
          0.25,
102
80.4M
          0.25,
103
80.4M
          0.25,
104
80.4M
          0.25,
105
80.4M
          0.25,
106
80.4M
          0.25,
107
80.4M
          0.25,
108
80.4M
          0.25,
109
80.4M
          0.25,
110
80.4M
          0.25,
111
80.4M
          0.25,
112
80.4M
          0.25,
113
80.4M
          0.25,
114
80.4M
      },
115
80.4M
      {
116
80.4M
          0.876902929799142f,
117
80.4M
          0.2206518106944235f,
118
80.4M
          -0.10140050393753763f,
119
80.4M
          -0.1014005039375375f,
120
80.4M
          0.2206518106944236f,
121
80.4M
          -0.10140050393753777f,
122
80.4M
          -0.10140050393753772f,
123
80.4M
          -0.10140050393753763f,
124
80.4M
          -0.10140050393753758f,
125
80.4M
          -0.10140050393753769f,
126
80.4M
          -0.1014005039375375f,
127
80.4M
          -0.10140050393753768f,
128
80.4M
          -0.10140050393753768f,
129
80.4M
          -0.10140050393753759f,
130
80.4M
          -0.10140050393753763f,
131
80.4M
          -0.10140050393753741f,
132
80.4M
      },
133
80.4M
      {
134
80.4M
          0.0,
135
80.4M
          0.0,
136
80.4M
          0.40670075830260755f,
137
80.4M
          0.44444816619734445f,
138
80.4M
          0.0,
139
80.4M
          0.0,
140
80.4M
          0.19574399372042936f,
141
80.4M
          0.2929100136981264f,
142
80.4M
          -0.40670075830260716f,
143
80.4M
          -0.19574399372042872f,
144
80.4M
          0.0,
145
80.4M
          0.11379074460448091f,
146
80.4M
          -0.44444816619734384f,
147
80.4M
          -0.29291001369812636f,
148
80.4M
          -0.1137907446044814f,
149
80.4M
          0.0,
150
80.4M
      },
151
80.4M
      {
152
80.4M
          0.0,
153
80.4M
          0.0,
154
80.4M
          -0.21255748058288748f,
155
80.4M
          0.3085497062849767f,
156
80.4M
          0.0,
157
80.4M
          0.4706702258572536f,
158
80.4M
          -0.1621205195722993f,
159
80.4M
          0.0,
160
80.4M
          -0.21255748058287047f,
161
80.4M
          -0.16212051957228327f,
162
80.4M
          -0.47067022585725277f,
163
80.4M
          -0.1464291867126764f,
164
80.4M
          0.3085497062849487f,
165
80.4M
          0.0,
166
80.4M
          -0.14642918671266536f,
167
80.4M
          0.4251149611657548f,
168
80.4M
      },
169
80.4M
      {
170
80.4M
          0.0,
171
80.4M
          -0.7071067811865474f,
172
80.4M
          0.0,
173
80.4M
          0.0,
174
80.4M
          0.7071067811865476f,
175
80.4M
          0.0,
176
80.4M
          0.0,
177
80.4M
          0.0,
178
80.4M
          0.0,
179
80.4M
          0.0,
180
80.4M
          0.0,
181
80.4M
          0.0,
182
80.4M
          0.0,
183
80.4M
          0.0,
184
80.4M
          0.0,
185
80.4M
          0.0,
186
80.4M
      },
187
80.4M
      {
188
80.4M
          -0.4105377591765233f,
189
80.4M
          0.6235485373547691f,
190
80.4M
          -0.06435071657946274f,
191
80.4M
          -0.06435071657946266f,
192
80.4M
          0.6235485373547694f,
193
80.4M
          -0.06435071657946284f,
194
80.4M
          -0.0643507165794628f,
195
80.4M
          -0.06435071657946274f,
196
80.4M
          -0.06435071657946272f,
197
80.4M
          -0.06435071657946279f,
198
80.4M
          -0.06435071657946266f,
199
80.4M
          -0.06435071657946277f,
200
80.4M
          -0.06435071657946277f,
201
80.4M
          -0.06435071657946273f,
202
80.4M
          -0.06435071657946274f,
203
80.4M
          -0.0643507165794626f,
204
80.4M
      },
205
80.4M
      {
206
80.4M
          0.0,
207
80.4M
          0.0,
208
80.4M
          -0.4517556589999482f,
209
80.4M
          0.15854503551840063f,
210
80.4M
          0.0,
211
80.4M
          -0.04038515160822202f,
212
80.4M
          0.0074182263792423875f,
213
80.4M
          0.39351034269210167f,
214
80.4M
          -0.45175565899994635f,
215
80.4M
          0.007418226379244351f,
216
80.4M
          0.1107416575309343f,
217
80.4M
          0.08298163094882051f,
218
80.4M
          0.15854503551839705f,
219
80.4M
          0.3935103426921022f,
220
80.4M
          0.0829816309488214f,
221
80.4M
          -0.45175565899994796f,
222
80.4M
      },
223
80.4M
      {
224
80.4M
          0.0,
225
80.4M
          0.0,
226
80.4M
          -0.304684750724869f,
227
80.4M
          0.5112616136591823f,
228
80.4M
          0.0,
229
80.4M
          0.0,
230
80.4M
          -0.290480129728998f,
231
80.4M
          -0.06578701549142804f,
232
80.4M
          0.304684750724884f,
233
80.4M
          0.2904801297290076f,
234
80.4M
          0.0,
235
80.4M
          -0.23889773523344604f,
236
80.4M
          -0.5112616136592012f,
237
80.4M
          0.06578701549142545f,
238
80.4M
          0.23889773523345467f,
239
80.4M
          0.0,
240
80.4M
      },
241
80.4M
      {
242
80.4M
          0.0,
243
80.4M
          0.0,
244
80.4M
          0.3017929516615495f,
245
80.4M
          0.25792362796341184f,
246
80.4M
          0.0,
247
80.4M
          0.16272340142866204f,
248
80.4M
          0.09520022653475037f,
249
80.4M
          0.0,
250
80.4M
          0.3017929516615503f,
251
80.4M
          0.09520022653475055f,
252
80.4M
          -0.16272340142866173f,
253
80.4M
          -0.35312385449816297f,
254
80.4M
          0.25792362796341295f,
255
80.4M
          0.0,
256
80.4M
          -0.3531238544981624f,
257
80.4M
          -0.6035859033230976f,
258
80.4M
      },
259
80.4M
      {
260
80.4M
          0.0,
261
80.4M
          0.0,
262
80.4M
          0.40824829046386274f,
263
80.4M
          0.0,
264
80.4M
          0.0,
265
80.4M
          0.0,
266
80.4M
          0.0,
267
80.4M
          -0.4082482904638628f,
268
80.4M
          -0.4082482904638635f,
269
80.4M
          0.0,
270
80.4M
          0.0,
271
80.4M
          -0.40824829046386296f,
272
80.4M
          0.0,
273
80.4M
          0.4082482904638634f,
274
80.4M
          0.408248290463863f,
275
80.4M
          0.0,
276
80.4M
      },
277
80.4M
      {
278
80.4M
          0.0,
279
80.4M
          0.0,
280
80.4M
          0.1747866975480809f,
281
80.4M
          0.0812611176717539f,
282
80.4M
          0.0,
283
80.4M
          0.0,
284
80.4M
          -0.3675398009862027f,
285
80.4M
          -0.307882213957909f,
286
80.4M
          -0.17478669754808135f,
287
80.4M
          0.3675398009862011f,
288
80.4M
          0.0,
289
80.4M
          0.4826689115059883f,
290
80.4M
          -0.08126111767175039f,
291
80.4M
          0.30788221395790305f,
292
80.4M
          -0.48266891150598584f,
293
80.4M
          0.0,
294
80.4M
      },
295
80.4M
      {
296
80.4M
          0.0,
297
80.4M
          0.0,
298
80.4M
          -0.21105601049335784f,
299
80.4M
          0.18567180916109802f,
300
80.4M
          0.0,
301
80.4M
          0.0,
302
80.4M
          0.49215859013738733f,
303
80.4M
          -0.38525013709251915f,
304
80.4M
          0.21105601049335806f,
305
80.4M
          -0.49215859013738905f,
306
80.4M
          0.0,
307
80.4M
          0.17419412659916217f,
308
80.4M
          -0.18567180916109904f,
309
80.4M
          0.3852501370925211f,
310
80.4M
          -0.1741941265991621f,
311
80.4M
          0.0,
312
80.4M
      },
313
80.4M
      {
314
80.4M
          0.0,
315
80.4M
          0.0,
316
80.4M
          -0.14266084808807264f,
317
80.4M
          -0.3416446842253372f,
318
80.4M
          0.0,
319
80.4M
          0.7367497537172237f,
320
80.4M
          0.24627107722075148f,
321
80.4M
          -0.08574019035519306f,
322
80.4M
          -0.14266084808807344f,
323
80.4M
          0.24627107722075137f,
324
80.4M
          0.14883399227113567f,
325
80.4M
          -0.04768680350229251f,
326
80.4M
          -0.3416446842253373f,
327
80.4M
          -0.08574019035519267f,
328
80.4M
          -0.047686803502292804f,
329
80.4M
          -0.14266084808807242f,
330
80.4M
      },
331
80.4M
      {
332
80.4M
          0.0,
333
80.4M
          0.0,
334
80.4M
          -0.13813540350758585f,
335
80.4M
          0.3302282550303788f,
336
80.4M
          0.0,
337
80.4M
          0.08755115000587084f,
338
80.4M
          -0.07946706605909573f,
339
80.4M
          -0.4613374887461511f,
340
80.4M
          -0.13813540350758294f,
341
80.4M
          -0.07946706605910261f,
342
80.4M
          0.49724647109535086f,
343
80.4M
          0.12538059448563663f,
344
80.4M
          0.3302282550303805f,
345
80.4M
          -0.4613374887461554f,
346
80.4M
          0.12538059448564315f,
347
80.4M
          -0.13813540350758452f,
348
80.4M
      },
349
80.4M
      {
350
80.4M
          0.0,
351
80.4M
          0.0,
352
80.4M
          -0.17437602599651067f,
353
80.4M
          0.0702790691196284f,
354
80.4M
          0.0,
355
80.4M
          -0.2921026642334881f,
356
80.4M
          0.3623817333531167f,
357
80.4M
          0.0,
358
80.4M
          -0.1743760259965108f,
359
80.4M
          0.36238173335311646f,
360
80.4M
          0.29210266423348785f,
361
80.4M
          -0.4326608024727445f,
362
80.4M
          0.07027906911962818f,
363
80.4M
          0.0,
364
80.4M
          -0.4326608024727457f,
365
80.4M
          0.34875205199302267f,
366
80.4M
      },
367
80.4M
      {
368
80.4M
          0.0,
369
80.4M
          0.0,
370
80.4M
          0.11354987314994337f,
371
80.4M
          -0.07417504595810355f,
372
80.4M
          0.0,
373
80.4M
          0.19402893032594343f,
374
80.4M
          -0.435190496523228f,
375
80.4M
          0.21918684838857466f,
376
80.4M
          0.11354987314994257f,
377
80.4M
          -0.4351904965232251f,
378
80.4M
          0.5550443808910661f,
379
80.4M
          -0.25468277124066463f,
380
80.4M
          -0.07417504595810233f,
381
80.4M
          0.2191868483885728f,
382
80.4M
          -0.25468277124066413f,
383
80.4M
          0.1135498731499429f,
384
80.4M
      },
385
80.4M
  };
386
387
80.4M
  const HWY_CAPPED(float, 16) d;
388
241M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
160M
    auto pixel = Zero(d);
390
2.73G
    for (size_t j = 0; j < 16; j++) {
391
2.57G
      auto cf = Set(d, coeffs[j]);
392
2.57G
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
2.57G
      pixel = MulAdd(cf, basis, pixel);
394
2.57G
    }
395
160M
    Store(pixel, d, pixels + i);
396
160M
  }
397
80.4M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
5.21M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
5.21M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
5.21M
      {
98
5.21M
          0.25,
99
5.21M
          0.25,
100
5.21M
          0.25,
101
5.21M
          0.25,
102
5.21M
          0.25,
103
5.21M
          0.25,
104
5.21M
          0.25,
105
5.21M
          0.25,
106
5.21M
          0.25,
107
5.21M
          0.25,
108
5.21M
          0.25,
109
5.21M
          0.25,
110
5.21M
          0.25,
111
5.21M
          0.25,
112
5.21M
          0.25,
113
5.21M
          0.25,
114
5.21M
      },
115
5.21M
      {
116
5.21M
          0.876902929799142f,
117
5.21M
          0.2206518106944235f,
118
5.21M
          -0.10140050393753763f,
119
5.21M
          -0.1014005039375375f,
120
5.21M
          0.2206518106944236f,
121
5.21M
          -0.10140050393753777f,
122
5.21M
          -0.10140050393753772f,
123
5.21M
          -0.10140050393753763f,
124
5.21M
          -0.10140050393753758f,
125
5.21M
          -0.10140050393753769f,
126
5.21M
          -0.1014005039375375f,
127
5.21M
          -0.10140050393753768f,
128
5.21M
          -0.10140050393753768f,
129
5.21M
          -0.10140050393753759f,
130
5.21M
          -0.10140050393753763f,
131
5.21M
          -0.10140050393753741f,
132
5.21M
      },
133
5.21M
      {
134
5.21M
          0.0,
135
5.21M
          0.0,
136
5.21M
          0.40670075830260755f,
137
5.21M
          0.44444816619734445f,
138
5.21M
          0.0,
139
5.21M
          0.0,
140
5.21M
          0.19574399372042936f,
141
5.21M
          0.2929100136981264f,
142
5.21M
          -0.40670075830260716f,
143
5.21M
          -0.19574399372042872f,
144
5.21M
          0.0,
145
5.21M
          0.11379074460448091f,
146
5.21M
          -0.44444816619734384f,
147
5.21M
          -0.29291001369812636f,
148
5.21M
          -0.1137907446044814f,
149
5.21M
          0.0,
150
5.21M
      },
151
5.21M
      {
152
5.21M
          0.0,
153
5.21M
          0.0,
154
5.21M
          -0.21255748058288748f,
155
5.21M
          0.3085497062849767f,
156
5.21M
          0.0,
157
5.21M
          0.4706702258572536f,
158
5.21M
          -0.1621205195722993f,
159
5.21M
          0.0,
160
5.21M
          -0.21255748058287047f,
161
5.21M
          -0.16212051957228327f,
162
5.21M
          -0.47067022585725277f,
163
5.21M
          -0.1464291867126764f,
164
5.21M
          0.3085497062849487f,
165
5.21M
          0.0,
166
5.21M
          -0.14642918671266536f,
167
5.21M
          0.4251149611657548f,
168
5.21M
      },
169
5.21M
      {
170
5.21M
          0.0,
171
5.21M
          -0.7071067811865474f,
172
5.21M
          0.0,
173
5.21M
          0.0,
174
5.21M
          0.7071067811865476f,
175
5.21M
          0.0,
176
5.21M
          0.0,
177
5.21M
          0.0,
178
5.21M
          0.0,
179
5.21M
          0.0,
180
5.21M
          0.0,
181
5.21M
          0.0,
182
5.21M
          0.0,
183
5.21M
          0.0,
184
5.21M
          0.0,
185
5.21M
          0.0,
186
5.21M
      },
187
5.21M
      {
188
5.21M
          -0.4105377591765233f,
189
5.21M
          0.6235485373547691f,
190
5.21M
          -0.06435071657946274f,
191
5.21M
          -0.06435071657946266f,
192
5.21M
          0.6235485373547694f,
193
5.21M
          -0.06435071657946284f,
194
5.21M
          -0.0643507165794628f,
195
5.21M
          -0.06435071657946274f,
196
5.21M
          -0.06435071657946272f,
197
5.21M
          -0.06435071657946279f,
198
5.21M
          -0.06435071657946266f,
199
5.21M
          -0.06435071657946277f,
200
5.21M
          -0.06435071657946277f,
201
5.21M
          -0.06435071657946273f,
202
5.21M
          -0.06435071657946274f,
203
5.21M
          -0.0643507165794626f,
204
5.21M
      },
205
5.21M
      {
206
5.21M
          0.0,
207
5.21M
          0.0,
208
5.21M
          -0.4517556589999482f,
209
5.21M
          0.15854503551840063f,
210
5.21M
          0.0,
211
5.21M
          -0.04038515160822202f,
212
5.21M
          0.0074182263792423875f,
213
5.21M
          0.39351034269210167f,
214
5.21M
          -0.45175565899994635f,
215
5.21M
          0.007418226379244351f,
216
5.21M
          0.1107416575309343f,
217
5.21M
          0.08298163094882051f,
218
5.21M
          0.15854503551839705f,
219
5.21M
          0.3935103426921022f,
220
5.21M
          0.0829816309488214f,
221
5.21M
          -0.45175565899994796f,
222
5.21M
      },
223
5.21M
      {
224
5.21M
          0.0,
225
5.21M
          0.0,
226
5.21M
          -0.304684750724869f,
227
5.21M
          0.5112616136591823f,
228
5.21M
          0.0,
229
5.21M
          0.0,
230
5.21M
          -0.290480129728998f,
231
5.21M
          -0.06578701549142804f,
232
5.21M
          0.304684750724884f,
233
5.21M
          0.2904801297290076f,
234
5.21M
          0.0,
235
5.21M
          -0.23889773523344604f,
236
5.21M
          -0.5112616136592012f,
237
5.21M
          0.06578701549142545f,
238
5.21M
          0.23889773523345467f,
239
5.21M
          0.0,
240
5.21M
      },
241
5.21M
      {
242
5.21M
          0.0,
243
5.21M
          0.0,
244
5.21M
          0.3017929516615495f,
245
5.21M
          0.25792362796341184f,
246
5.21M
          0.0,
247
5.21M
          0.16272340142866204f,
248
5.21M
          0.09520022653475037f,
249
5.21M
          0.0,
250
5.21M
          0.3017929516615503f,
251
5.21M
          0.09520022653475055f,
252
5.21M
          -0.16272340142866173f,
253
5.21M
          -0.35312385449816297f,
254
5.21M
          0.25792362796341295f,
255
5.21M
          0.0,
256
5.21M
          -0.3531238544981624f,
257
5.21M
          -0.6035859033230976f,
258
5.21M
      },
259
5.21M
      {
260
5.21M
          0.0,
261
5.21M
          0.0,
262
5.21M
          0.40824829046386274f,
263
5.21M
          0.0,
264
5.21M
          0.0,
265
5.21M
          0.0,
266
5.21M
          0.0,
267
5.21M
          -0.4082482904638628f,
268
5.21M
          -0.4082482904638635f,
269
5.21M
          0.0,
270
5.21M
          0.0,
271
5.21M
          -0.40824829046386296f,
272
5.21M
          0.0,
273
5.21M
          0.4082482904638634f,
274
5.21M
          0.408248290463863f,
275
5.21M
          0.0,
276
5.21M
      },
277
5.21M
      {
278
5.21M
          0.0,
279
5.21M
          0.0,
280
5.21M
          0.1747866975480809f,
281
5.21M
          0.0812611176717539f,
282
5.21M
          0.0,
283
5.21M
          0.0,
284
5.21M
          -0.3675398009862027f,
285
5.21M
          -0.307882213957909f,
286
5.21M
          -0.17478669754808135f,
287
5.21M
          0.3675398009862011f,
288
5.21M
          0.0,
289
5.21M
          0.4826689115059883f,
290
5.21M
          -0.08126111767175039f,
291
5.21M
          0.30788221395790305f,
292
5.21M
          -0.48266891150598584f,
293
5.21M
          0.0,
294
5.21M
      },
295
5.21M
      {
296
5.21M
          0.0,
297
5.21M
          0.0,
298
5.21M
          -0.21105601049335784f,
299
5.21M
          0.18567180916109802f,
300
5.21M
          0.0,
301
5.21M
          0.0,
302
5.21M
          0.49215859013738733f,
303
5.21M
          -0.38525013709251915f,
304
5.21M
          0.21105601049335806f,
305
5.21M
          -0.49215859013738905f,
306
5.21M
          0.0,
307
5.21M
          0.17419412659916217f,
308
5.21M
          -0.18567180916109904f,
309
5.21M
          0.3852501370925211f,
310
5.21M
          -0.1741941265991621f,
311
5.21M
          0.0,
312
5.21M
      },
313
5.21M
      {
314
5.21M
          0.0,
315
5.21M
          0.0,
316
5.21M
          -0.14266084808807264f,
317
5.21M
          -0.3416446842253372f,
318
5.21M
          0.0,
319
5.21M
          0.7367497537172237f,
320
5.21M
          0.24627107722075148f,
321
5.21M
          -0.08574019035519306f,
322
5.21M
          -0.14266084808807344f,
323
5.21M
          0.24627107722075137f,
324
5.21M
          0.14883399227113567f,
325
5.21M
          -0.04768680350229251f,
326
5.21M
          -0.3416446842253373f,
327
5.21M
          -0.08574019035519267f,
328
5.21M
          -0.047686803502292804f,
329
5.21M
          -0.14266084808807242f,
330
5.21M
      },
331
5.21M
      {
332
5.21M
          0.0,
333
5.21M
          0.0,
334
5.21M
          -0.13813540350758585f,
335
5.21M
          0.3302282550303788f,
336
5.21M
          0.0,
337
5.21M
          0.08755115000587084f,
338
5.21M
          -0.07946706605909573f,
339
5.21M
          -0.4613374887461511f,
340
5.21M
          -0.13813540350758294f,
341
5.21M
          -0.07946706605910261f,
342
5.21M
          0.49724647109535086f,
343
5.21M
          0.12538059448563663f,
344
5.21M
          0.3302282550303805f,
345
5.21M
          -0.4613374887461554f,
346
5.21M
          0.12538059448564315f,
347
5.21M
          -0.13813540350758452f,
348
5.21M
      },
349
5.21M
      {
350
5.21M
          0.0,
351
5.21M
          0.0,
352
5.21M
          -0.17437602599651067f,
353
5.21M
          0.0702790691196284f,
354
5.21M
          0.0,
355
5.21M
          -0.2921026642334881f,
356
5.21M
          0.3623817333531167f,
357
5.21M
          0.0,
358
5.21M
          -0.1743760259965108f,
359
5.21M
          0.36238173335311646f,
360
5.21M
          0.29210266423348785f,
361
5.21M
          -0.4326608024727445f,
362
5.21M
          0.07027906911962818f,
363
5.21M
          0.0,
364
5.21M
          -0.4326608024727457f,
365
5.21M
          0.34875205199302267f,
366
5.21M
      },
367
5.21M
      {
368
5.21M
          0.0,
369
5.21M
          0.0,
370
5.21M
          0.11354987314994337f,
371
5.21M
          -0.07417504595810355f,
372
5.21M
          0.0,
373
5.21M
          0.19402893032594343f,
374
5.21M
          -0.435190496523228f,
375
5.21M
          0.21918684838857466f,
376
5.21M
          0.11354987314994257f,
377
5.21M
          -0.4351904965232251f,
378
5.21M
          0.5550443808910661f,
379
5.21M
          -0.25468277124066463f,
380
5.21M
          -0.07417504595810233f,
381
5.21M
          0.2191868483885728f,
382
5.21M
          -0.25468277124066413f,
383
5.21M
          0.1135498731499429f,
384
5.21M
      },
385
5.21M
  };
386
387
5.21M
  const HWY_CAPPED(float, 16) d;
388
15.6M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
10.4M
    auto pixel = Zero(d);
390
177M
    for (size_t j = 0; j < 16; j++) {
391
166M
      auto cf = Set(d, coeffs[j]);
392
166M
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
166M
      pixel = MulAdd(cf, basis, pixel);
394
166M
    }
395
10.4M
    Store(pixel, d, pixels + i);
396
10.4M
  }
397
5.21M
}
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
75.2M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
75.2M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
75.2M
      {
98
75.2M
          0.25,
99
75.2M
          0.25,
100
75.2M
          0.25,
101
75.2M
          0.25,
102
75.2M
          0.25,
103
75.2M
          0.25,
104
75.2M
          0.25,
105
75.2M
          0.25,
106
75.2M
          0.25,
107
75.2M
          0.25,
108
75.2M
          0.25,
109
75.2M
          0.25,
110
75.2M
          0.25,
111
75.2M
          0.25,
112
75.2M
          0.25,
113
75.2M
          0.25,
114
75.2M
      },
115
75.2M
      {
116
75.2M
          0.876902929799142f,
117
75.2M
          0.2206518106944235f,
118
75.2M
          -0.10140050393753763f,
119
75.2M
          -0.1014005039375375f,
120
75.2M
          0.2206518106944236f,
121
75.2M
          -0.10140050393753777f,
122
75.2M
          -0.10140050393753772f,
123
75.2M
          -0.10140050393753763f,
124
75.2M
          -0.10140050393753758f,
125
75.2M
          -0.10140050393753769f,
126
75.2M
          -0.1014005039375375f,
127
75.2M
          -0.10140050393753768f,
128
75.2M
          -0.10140050393753768f,
129
75.2M
          -0.10140050393753759f,
130
75.2M
          -0.10140050393753763f,
131
75.2M
          -0.10140050393753741f,
132
75.2M
      },
133
75.2M
      {
134
75.2M
          0.0,
135
75.2M
          0.0,
136
75.2M
          0.40670075830260755f,
137
75.2M
          0.44444816619734445f,
138
75.2M
          0.0,
139
75.2M
          0.0,
140
75.2M
          0.19574399372042936f,
141
75.2M
          0.2929100136981264f,
142
75.2M
          -0.40670075830260716f,
143
75.2M
          -0.19574399372042872f,
144
75.2M
          0.0,
145
75.2M
          0.11379074460448091f,
146
75.2M
          -0.44444816619734384f,
147
75.2M
          -0.29291001369812636f,
148
75.2M
          -0.1137907446044814f,
149
75.2M
          0.0,
150
75.2M
      },
151
75.2M
      {
152
75.2M
          0.0,
153
75.2M
          0.0,
154
75.2M
          -0.21255748058288748f,
155
75.2M
          0.3085497062849767f,
156
75.2M
          0.0,
157
75.2M
          0.4706702258572536f,
158
75.2M
          -0.1621205195722993f,
159
75.2M
          0.0,
160
75.2M
          -0.21255748058287047f,
161
75.2M
          -0.16212051957228327f,
162
75.2M
          -0.47067022585725277f,
163
75.2M
          -0.1464291867126764f,
164
75.2M
          0.3085497062849487f,
165
75.2M
          0.0,
166
75.2M
          -0.14642918671266536f,
167
75.2M
          0.4251149611657548f,
168
75.2M
      },
169
75.2M
      {
170
75.2M
          0.0,
171
75.2M
          -0.7071067811865474f,
172
75.2M
          0.0,
173
75.2M
          0.0,
174
75.2M
          0.7071067811865476f,
175
75.2M
          0.0,
176
75.2M
          0.0,
177
75.2M
          0.0,
178
75.2M
          0.0,
179
75.2M
          0.0,
180
75.2M
          0.0,
181
75.2M
          0.0,
182
75.2M
          0.0,
183
75.2M
          0.0,
184
75.2M
          0.0,
185
75.2M
          0.0,
186
75.2M
      },
187
75.2M
      {
188
75.2M
          -0.4105377591765233f,
189
75.2M
          0.6235485373547691f,
190
75.2M
          -0.06435071657946274f,
191
75.2M
          -0.06435071657946266f,
192
75.2M
          0.6235485373547694f,
193
75.2M
          -0.06435071657946284f,
194
75.2M
          -0.0643507165794628f,
195
75.2M
          -0.06435071657946274f,
196
75.2M
          -0.06435071657946272f,
197
75.2M
          -0.06435071657946279f,
198
75.2M
          -0.06435071657946266f,
199
75.2M
          -0.06435071657946277f,
200
75.2M
          -0.06435071657946277f,
201
75.2M
          -0.06435071657946273f,
202
75.2M
          -0.06435071657946274f,
203
75.2M
          -0.0643507165794626f,
204
75.2M
      },
205
75.2M
      {
206
75.2M
          0.0,
207
75.2M
          0.0,
208
75.2M
          -0.4517556589999482f,
209
75.2M
          0.15854503551840063f,
210
75.2M
          0.0,
211
75.2M
          -0.04038515160822202f,
212
75.2M
          0.0074182263792423875f,
213
75.2M
          0.39351034269210167f,
214
75.2M
          -0.45175565899994635f,
215
75.2M
          0.007418226379244351f,
216
75.2M
          0.1107416575309343f,
217
75.2M
          0.08298163094882051f,
218
75.2M
          0.15854503551839705f,
219
75.2M
          0.3935103426921022f,
220
75.2M
          0.0829816309488214f,
221
75.2M
          -0.45175565899994796f,
222
75.2M
      },
223
75.2M
      {
224
75.2M
          0.0,
225
75.2M
          0.0,
226
75.2M
          -0.304684750724869f,
227
75.2M
          0.5112616136591823f,
228
75.2M
          0.0,
229
75.2M
          0.0,
230
75.2M
          -0.290480129728998f,
231
75.2M
          -0.06578701549142804f,
232
75.2M
          0.304684750724884f,
233
75.2M
          0.2904801297290076f,
234
75.2M
          0.0,
235
75.2M
          -0.23889773523344604f,
236
75.2M
          -0.5112616136592012f,
237
75.2M
          0.06578701549142545f,
238
75.2M
          0.23889773523345467f,
239
75.2M
          0.0,
240
75.2M
      },
241
75.2M
      {
242
75.2M
          0.0,
243
75.2M
          0.0,
244
75.2M
          0.3017929516615495f,
245
75.2M
          0.25792362796341184f,
246
75.2M
          0.0,
247
75.2M
          0.16272340142866204f,
248
75.2M
          0.09520022653475037f,
249
75.2M
          0.0,
250
75.2M
          0.3017929516615503f,
251
75.2M
          0.09520022653475055f,
252
75.2M
          -0.16272340142866173f,
253
75.2M
          -0.35312385449816297f,
254
75.2M
          0.25792362796341295f,
255
75.2M
          0.0,
256
75.2M
          -0.3531238544981624f,
257
75.2M
          -0.6035859033230976f,
258
75.2M
      },
259
75.2M
      {
260
75.2M
          0.0,
261
75.2M
          0.0,
262
75.2M
          0.40824829046386274f,
263
75.2M
          0.0,
264
75.2M
          0.0,
265
75.2M
          0.0,
266
75.2M
          0.0,
267
75.2M
          -0.4082482904638628f,
268
75.2M
          -0.4082482904638635f,
269
75.2M
          0.0,
270
75.2M
          0.0,
271
75.2M
          -0.40824829046386296f,
272
75.2M
          0.0,
273
75.2M
          0.4082482904638634f,
274
75.2M
          0.408248290463863f,
275
75.2M
          0.0,
276
75.2M
      },
277
75.2M
      {
278
75.2M
          0.0,
279
75.2M
          0.0,
280
75.2M
          0.1747866975480809f,
281
75.2M
          0.0812611176717539f,
282
75.2M
          0.0,
283
75.2M
          0.0,
284
75.2M
          -0.3675398009862027f,
285
75.2M
          -0.307882213957909f,
286
75.2M
          -0.17478669754808135f,
287
75.2M
          0.3675398009862011f,
288
75.2M
          0.0,
289
75.2M
          0.4826689115059883f,
290
75.2M
          -0.08126111767175039f,
291
75.2M
          0.30788221395790305f,
292
75.2M
          -0.48266891150598584f,
293
75.2M
          0.0,
294
75.2M
      },
295
75.2M
      {
296
75.2M
          0.0,
297
75.2M
          0.0,
298
75.2M
          -0.21105601049335784f,
299
75.2M
          0.18567180916109802f,
300
75.2M
          0.0,
301
75.2M
          0.0,
302
75.2M
          0.49215859013738733f,
303
75.2M
          -0.38525013709251915f,
304
75.2M
          0.21105601049335806f,
305
75.2M
          -0.49215859013738905f,
306
75.2M
          0.0,
307
75.2M
          0.17419412659916217f,
308
75.2M
          -0.18567180916109904f,
309
75.2M
          0.3852501370925211f,
310
75.2M
          -0.1741941265991621f,
311
75.2M
          0.0,
312
75.2M
      },
313
75.2M
      {
314
75.2M
          0.0,
315
75.2M
          0.0,
316
75.2M
          -0.14266084808807264f,
317
75.2M
          -0.3416446842253372f,
318
75.2M
          0.0,
319
75.2M
          0.7367497537172237f,
320
75.2M
          0.24627107722075148f,
321
75.2M
          -0.08574019035519306f,
322
75.2M
          -0.14266084808807344f,
323
75.2M
          0.24627107722075137f,
324
75.2M
          0.14883399227113567f,
325
75.2M
          -0.04768680350229251f,
326
75.2M
          -0.3416446842253373f,
327
75.2M
          -0.08574019035519267f,
328
75.2M
          -0.047686803502292804f,
329
75.2M
          -0.14266084808807242f,
330
75.2M
      },
331
75.2M
      {
332
75.2M
          0.0,
333
75.2M
          0.0,
334
75.2M
          -0.13813540350758585f,
335
75.2M
          0.3302282550303788f,
336
75.2M
          0.0,
337
75.2M
          0.08755115000587084f,
338
75.2M
          -0.07946706605909573f,
339
75.2M
          -0.4613374887461511f,
340
75.2M
          -0.13813540350758294f,
341
75.2M
          -0.07946706605910261f,
342
75.2M
          0.49724647109535086f,
343
75.2M
          0.12538059448563663f,
344
75.2M
          0.3302282550303805f,
345
75.2M
          -0.4613374887461554f,
346
75.2M
          0.12538059448564315f,
347
75.2M
          -0.13813540350758452f,
348
75.2M
      },
349
75.2M
      {
350
75.2M
          0.0,
351
75.2M
          0.0,
352
75.2M
          -0.17437602599651067f,
353
75.2M
          0.0702790691196284f,
354
75.2M
          0.0,
355
75.2M
          -0.2921026642334881f,
356
75.2M
          0.3623817333531167f,
357
75.2M
          0.0,
358
75.2M
          -0.1743760259965108f,
359
75.2M
          0.36238173335311646f,
360
75.2M
          0.29210266423348785f,
361
75.2M
          -0.4326608024727445f,
362
75.2M
          0.07027906911962818f,
363
75.2M
          0.0,
364
75.2M
          -0.4326608024727457f,
365
75.2M
          0.34875205199302267f,
366
75.2M
      },
367
75.2M
      {
368
75.2M
          0.0,
369
75.2M
          0.0,
370
75.2M
          0.11354987314994337f,
371
75.2M
          -0.07417504595810355f,
372
75.2M
          0.0,
373
75.2M
          0.19402893032594343f,
374
75.2M
          -0.435190496523228f,
375
75.2M
          0.21918684838857466f,
376
75.2M
          0.11354987314994257f,
377
75.2M
          -0.4351904965232251f,
378
75.2M
          0.5550443808910661f,
379
75.2M
          -0.25468277124066463f,
380
75.2M
          -0.07417504595810233f,
381
75.2M
          0.2191868483885728f,
382
75.2M
          -0.25468277124066413f,
383
75.2M
          0.1135498731499429f,
384
75.2M
      },
385
75.2M
  };
386
387
75.2M
  const HWY_CAPPED(float, 16) d;
388
225M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
150M
    auto pixel = Zero(d);
390
2.55G
    for (size_t j = 0; j < 16; j++) {
391
2.40G
      auto cf = Set(d, coeffs[j]);
392
2.40G
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
2.40G
      pixel = MulAdd(cf, basis, pixel);
394
2.40G
    }
395
150M
    Store(pixel, d, pixels + i);
396
150M
  }
397
75.2M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
398
399
template <size_t afv_kind>
400
void AFVTransformToPixels(const float* JXL_RESTRICT coefficients,
401
80.4M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
80.4M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
80.4M
  size_t afv_x = afv_kind & 1;
404
80.4M
  size_t afv_y = afv_kind / 2;
405
80.4M
  float dcs[3] = {};
406
80.4M
  float block00 = coefficients[0];
407
80.4M
  float block01 = coefficients[1];
408
80.4M
  float block10 = coefficients[8];
409
80.4M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
80.4M
  dcs[1] = (block00 + block10 - block01);
411
80.4M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
80.4M
  HWY_ALIGN float coeff[4 * 4];
414
80.4M
  coeff[0] = dcs[0];
415
402M
  for (size_t iy = 0; iy < 4; iy++) {
416
1.60G
    for (size_t ix = 0; ix < 4; ix++) {
417
1.28G
      if (ix == 0 && iy == 0) continue;
418
1.20G
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
1.20G
    }
420
321M
  }
421
80.4M
  HWY_ALIGN float block[4 * 8];
422
80.4M
  AFVIDCT4x4(coeff, block);
423
402M
  for (size_t iy = 0; iy < 4; iy++) {
424
1.60G
    for (size_t ix = 0; ix < 4; ix++) {
425
1.28G
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
1.28G
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
1.28G
    }
428
321M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
80.4M
  block[0] = dcs[1];
431
402M
  for (size_t iy = 0; iy < 4; iy++) {
432
1.60G
    for (size_t ix = 0; ix < 4; ix++) {
433
1.28G
      if (ix == 0 && iy == 0) continue;
434
1.20G
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
1.20G
    }
436
321M
  }
437
80.4M
  ComputeScaledIDCT<4, 4>()(
438
80.4M
      block,
439
80.4M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
80.4M
            pixels_stride),
441
80.4M
      scratch_space);
442
  // IDCT4x8.
443
80.4M
  block[0] = dcs[2];
444
402M
  for (size_t iy = 0; iy < 4; iy++) {
445
2.89G
    for (size_t ix = 0; ix < 8; ix++) {
446
2.57G
      if (ix == 0 && iy == 0) continue;
447
2.49G
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
2.49G
    }
449
321M
  }
450
80.4M
  ComputeScaledIDCT<4, 8>()(
451
80.4M
      block,
452
80.4M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
80.4M
      scratch_space);
454
80.4M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
999k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
999k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
999k
  size_t afv_x = afv_kind & 1;
404
999k
  size_t afv_y = afv_kind / 2;
405
999k
  float dcs[3] = {};
406
999k
  float block00 = coefficients[0];
407
999k
  float block01 = coefficients[1];
408
999k
  float block10 = coefficients[8];
409
999k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
999k
  dcs[1] = (block00 + block10 - block01);
411
999k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
999k
  HWY_ALIGN float coeff[4 * 4];
414
999k
  coeff[0] = dcs[0];
415
4.99M
  for (size_t iy = 0; iy < 4; iy++) {
416
19.9M
    for (size_t ix = 0; ix < 4; ix++) {
417
15.9M
      if (ix == 0 && iy == 0) continue;
418
14.9M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
14.9M
    }
420
3.99M
  }
421
999k
  HWY_ALIGN float block[4 * 8];
422
999k
  AFVIDCT4x4(coeff, block);
423
4.99M
  for (size_t iy = 0; iy < 4; iy++) {
424
19.9M
    for (size_t ix = 0; ix < 4; ix++) {
425
15.9M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
15.9M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
15.9M
    }
428
3.99M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
999k
  block[0] = dcs[1];
431
4.99M
  for (size_t iy = 0; iy < 4; iy++) {
432
19.9M
    for (size_t ix = 0; ix < 4; ix++) {
433
15.9M
      if (ix == 0 && iy == 0) continue;
434
14.9M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
14.9M
    }
436
3.99M
  }
437
999k
  ComputeScaledIDCT<4, 4>()(
438
999k
      block,
439
999k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
999k
            pixels_stride),
441
999k
      scratch_space);
442
  // IDCT4x8.
443
999k
  block[0] = dcs[2];
444
4.99M
  for (size_t iy = 0; iy < 4; iy++) {
445
35.9M
    for (size_t ix = 0; ix < 8; ix++) {
446
31.9M
      if (ix == 0 && iy == 0) continue;
447
30.9M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
30.9M
    }
449
3.99M
  }
450
999k
  ComputeScaledIDCT<4, 8>()(
451
999k
      block,
452
999k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
999k
      scratch_space);
454
999k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
2.06M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
2.06M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
2.06M
  size_t afv_x = afv_kind & 1;
404
2.06M
  size_t afv_y = afv_kind / 2;
405
2.06M
  float dcs[3] = {};
406
2.06M
  float block00 = coefficients[0];
407
2.06M
  float block01 = coefficients[1];
408
2.06M
  float block10 = coefficients[8];
409
2.06M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
2.06M
  dcs[1] = (block00 + block10 - block01);
411
2.06M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
2.06M
  HWY_ALIGN float coeff[4 * 4];
414
2.06M
  coeff[0] = dcs[0];
415
10.3M
  for (size_t iy = 0; iy < 4; iy++) {
416
41.3M
    for (size_t ix = 0; ix < 4; ix++) {
417
33.1M
      if (ix == 0 && iy == 0) continue;
418
31.0M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
31.0M
    }
420
8.27M
  }
421
2.06M
  HWY_ALIGN float block[4 * 8];
422
2.06M
  AFVIDCT4x4(coeff, block);
423
10.3M
  for (size_t iy = 0; iy < 4; iy++) {
424
41.3M
    for (size_t ix = 0; ix < 4; ix++) {
425
33.1M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
33.1M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
33.1M
    }
428
8.27M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
2.06M
  block[0] = dcs[1];
431
10.3M
  for (size_t iy = 0; iy < 4; iy++) {
432
41.3M
    for (size_t ix = 0; ix < 4; ix++) {
433
33.1M
      if (ix == 0 && iy == 0) continue;
434
31.0M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
31.0M
    }
436
8.27M
  }
437
2.06M
  ComputeScaledIDCT<4, 4>()(
438
2.06M
      block,
439
2.06M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
2.06M
            pixels_stride),
441
2.06M
      scratch_space);
442
  // IDCT4x8.
443
2.06M
  block[0] = dcs[2];
444
10.3M
  for (size_t iy = 0; iy < 4; iy++) {
445
74.5M
    for (size_t ix = 0; ix < 8; ix++) {
446
66.2M
      if (ix == 0 && iy == 0) continue;
447
64.1M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
64.1M
    }
449
8.27M
  }
450
2.06M
  ComputeScaledIDCT<4, 8>()(
451
2.06M
      block,
452
2.06M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
2.06M
      scratch_space);
454
2.06M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
858k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
858k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
858k
  size_t afv_x = afv_kind & 1;
404
858k
  size_t afv_y = afv_kind / 2;
405
858k
  float dcs[3] = {};
406
858k
  float block00 = coefficients[0];
407
858k
  float block01 = coefficients[1];
408
858k
  float block10 = coefficients[8];
409
858k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
858k
  dcs[1] = (block00 + block10 - block01);
411
858k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
858k
  HWY_ALIGN float coeff[4 * 4];
414
858k
  coeff[0] = dcs[0];
415
4.29M
  for (size_t iy = 0; iy < 4; iy++) {
416
17.1M
    for (size_t ix = 0; ix < 4; ix++) {
417
13.7M
      if (ix == 0 && iy == 0) continue;
418
12.8M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
12.8M
    }
420
3.43M
  }
421
858k
  HWY_ALIGN float block[4 * 8];
422
858k
  AFVIDCT4x4(coeff, block);
423
4.29M
  for (size_t iy = 0; iy < 4; iy++) {
424
17.1M
    for (size_t ix = 0; ix < 4; ix++) {
425
13.7M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
13.7M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
13.7M
    }
428
3.43M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
858k
  block[0] = dcs[1];
431
4.29M
  for (size_t iy = 0; iy < 4; iy++) {
432
17.1M
    for (size_t ix = 0; ix < 4; ix++) {
433
13.7M
      if (ix == 0 && iy == 0) continue;
434
12.8M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
12.8M
    }
436
3.43M
  }
437
858k
  ComputeScaledIDCT<4, 4>()(
438
858k
      block,
439
858k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
858k
            pixels_stride),
441
858k
      scratch_space);
442
  // IDCT4x8.
443
858k
  block[0] = dcs[2];
444
4.29M
  for (size_t iy = 0; iy < 4; iy++) {
445
30.9M
    for (size_t ix = 0; ix < 8; ix++) {
446
27.4M
      if (ix == 0 && iy == 0) continue;
447
26.6M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
26.6M
    }
449
3.43M
  }
450
858k
  ComputeScaledIDCT<4, 8>()(
451
858k
      block,
452
858k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
858k
      scratch_space);
454
858k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
1.28M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
1.28M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
1.28M
  size_t afv_x = afv_kind & 1;
404
1.28M
  size_t afv_y = afv_kind / 2;
405
1.28M
  float dcs[3] = {};
406
1.28M
  float block00 = coefficients[0];
407
1.28M
  float block01 = coefficients[1];
408
1.28M
  float block10 = coefficients[8];
409
1.28M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
1.28M
  dcs[1] = (block00 + block10 - block01);
411
1.28M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
1.28M
  HWY_ALIGN float coeff[4 * 4];
414
1.28M
  coeff[0] = dcs[0];
415
6.41M
  for (size_t iy = 0; iy < 4; iy++) {
416
25.6M
    for (size_t ix = 0; ix < 4; ix++) {
417
20.5M
      if (ix == 0 && iy == 0) continue;
418
19.2M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
19.2M
    }
420
5.13M
  }
421
1.28M
  HWY_ALIGN float block[4 * 8];
422
1.28M
  AFVIDCT4x4(coeff, block);
423
6.41M
  for (size_t iy = 0; iy < 4; iy++) {
424
25.6M
    for (size_t ix = 0; ix < 4; ix++) {
425
20.5M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
20.5M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
20.5M
    }
428
5.13M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
1.28M
  block[0] = dcs[1];
431
6.41M
  for (size_t iy = 0; iy < 4; iy++) {
432
25.6M
    for (size_t ix = 0; ix < 4; ix++) {
433
20.5M
      if (ix == 0 && iy == 0) continue;
434
19.2M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
19.2M
    }
436
5.13M
  }
437
1.28M
  ComputeScaledIDCT<4, 4>()(
438
1.28M
      block,
439
1.28M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
1.28M
            pixels_stride),
441
1.28M
      scratch_space);
442
  // IDCT4x8.
443
1.28M
  block[0] = dcs[2];
444
6.41M
  for (size_t iy = 0; iy < 4; iy++) {
445
46.2M
    for (size_t ix = 0; ix < 8; ix++) {
446
41.0M
      if (ix == 0 && iy == 0) continue;
447
39.7M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
39.7M
    }
449
5.13M
  }
450
1.28M
  ComputeScaledIDCT<4, 8>()(
451
1.28M
      block,
452
1.28M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
1.28M
      scratch_space);
454
1.28M
}
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
18.8M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
18.8M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
18.8M
  size_t afv_x = afv_kind & 1;
404
18.8M
  size_t afv_y = afv_kind / 2;
405
18.8M
  float dcs[3] = {};
406
18.8M
  float block00 = coefficients[0];
407
18.8M
  float block01 = coefficients[1];
408
18.8M
  float block10 = coefficients[8];
409
18.8M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
18.8M
  dcs[1] = (block00 + block10 - block01);
411
18.8M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
18.8M
  HWY_ALIGN float coeff[4 * 4];
414
18.8M
  coeff[0] = dcs[0];
415
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
416
376M
    for (size_t ix = 0; ix < 4; ix++) {
417
301M
      if (ix == 0 && iy == 0) continue;
418
282M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
282M
    }
420
75.2M
  }
421
18.8M
  HWY_ALIGN float block[4 * 8];
422
18.8M
  AFVIDCT4x4(coeff, block);
423
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
424
376M
    for (size_t ix = 0; ix < 4; ix++) {
425
301M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
301M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
301M
    }
428
75.2M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
18.8M
  block[0] = dcs[1];
431
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
432
376M
    for (size_t ix = 0; ix < 4; ix++) {
433
301M
      if (ix == 0 && iy == 0) continue;
434
282M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
282M
    }
436
75.2M
  }
437
18.8M
  ComputeScaledIDCT<4, 4>()(
438
18.8M
      block,
439
18.8M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
18.8M
            pixels_stride),
441
18.8M
      scratch_space);
442
  // IDCT4x8.
443
18.8M
  block[0] = dcs[2];
444
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
445
677M
    for (size_t ix = 0; ix < 8; ix++) {
446
602M
      if (ix == 0 && iy == 0) continue;
447
583M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
583M
    }
449
75.2M
  }
450
18.8M
  ComputeScaledIDCT<4, 8>()(
451
18.8M
      block,
452
18.8M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
18.8M
      scratch_space);
454
18.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
18.8M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
18.8M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
18.8M
  size_t afv_x = afv_kind & 1;
404
18.8M
  size_t afv_y = afv_kind / 2;
405
18.8M
  float dcs[3] = {};
406
18.8M
  float block00 = coefficients[0];
407
18.8M
  float block01 = coefficients[1];
408
18.8M
  float block10 = coefficients[8];
409
18.8M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
18.8M
  dcs[1] = (block00 + block10 - block01);
411
18.8M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
18.8M
  HWY_ALIGN float coeff[4 * 4];
414
18.8M
  coeff[0] = dcs[0];
415
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
416
376M
    for (size_t ix = 0; ix < 4; ix++) {
417
301M
      if (ix == 0 && iy == 0) continue;
418
282M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
282M
    }
420
75.2M
  }
421
18.8M
  HWY_ALIGN float block[4 * 8];
422
18.8M
  AFVIDCT4x4(coeff, block);
423
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
424
376M
    for (size_t ix = 0; ix < 4; ix++) {
425
301M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
301M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
301M
    }
428
75.2M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
18.8M
  block[0] = dcs[1];
431
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
432
376M
    for (size_t ix = 0; ix < 4; ix++) {
433
301M
      if (ix == 0 && iy == 0) continue;
434
282M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
282M
    }
436
75.2M
  }
437
18.8M
  ComputeScaledIDCT<4, 4>()(
438
18.8M
      block,
439
18.8M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
18.8M
            pixels_stride),
441
18.8M
      scratch_space);
442
  // IDCT4x8.
443
18.8M
  block[0] = dcs[2];
444
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
445
677M
    for (size_t ix = 0; ix < 8; ix++) {
446
602M
      if (ix == 0 && iy == 0) continue;
447
583M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
583M
    }
449
75.2M
  }
450
18.8M
  ComputeScaledIDCT<4, 8>()(
451
18.8M
      block,
452
18.8M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
18.8M
      scratch_space);
454
18.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
18.8M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
18.8M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
18.8M
  size_t afv_x = afv_kind & 1;
404
18.8M
  size_t afv_y = afv_kind / 2;
405
18.8M
  float dcs[3] = {};
406
18.8M
  float block00 = coefficients[0];
407
18.8M
  float block01 = coefficients[1];
408
18.8M
  float block10 = coefficients[8];
409
18.8M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
18.8M
  dcs[1] = (block00 + block10 - block01);
411
18.8M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
18.8M
  HWY_ALIGN float coeff[4 * 4];
414
18.8M
  coeff[0] = dcs[0];
415
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
416
376M
    for (size_t ix = 0; ix < 4; ix++) {
417
301M
      if (ix == 0 && iy == 0) continue;
418
282M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
282M
    }
420
75.2M
  }
421
18.8M
  HWY_ALIGN float block[4 * 8];
422
18.8M
  AFVIDCT4x4(coeff, block);
423
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
424
376M
    for (size_t ix = 0; ix < 4; ix++) {
425
301M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
301M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
301M
    }
428
75.2M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
18.8M
  block[0] = dcs[1];
431
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
432
376M
    for (size_t ix = 0; ix < 4; ix++) {
433
301M
      if (ix == 0 && iy == 0) continue;
434
282M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
282M
    }
436
75.2M
  }
437
18.8M
  ComputeScaledIDCT<4, 4>()(
438
18.8M
      block,
439
18.8M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
18.8M
            pixels_stride),
441
18.8M
      scratch_space);
442
  // IDCT4x8.
443
18.8M
  block[0] = dcs[2];
444
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
445
677M
    for (size_t ix = 0; ix < 8; ix++) {
446
602M
      if (ix == 0 && iy == 0) continue;
447
583M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
583M
    }
449
75.2M
  }
450
18.8M
  ComputeScaledIDCT<4, 8>()(
451
18.8M
      block,
452
18.8M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
18.8M
      scratch_space);
454
18.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
18.8M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
18.8M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
18.8M
  size_t afv_x = afv_kind & 1;
404
18.8M
  size_t afv_y = afv_kind / 2;
405
18.8M
  float dcs[3] = {};
406
18.8M
  float block00 = coefficients[0];
407
18.8M
  float block01 = coefficients[1];
408
18.8M
  float block10 = coefficients[8];
409
18.8M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
18.8M
  dcs[1] = (block00 + block10 - block01);
411
18.8M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
18.8M
  HWY_ALIGN float coeff[4 * 4];
414
18.8M
  coeff[0] = dcs[0];
415
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
416
376M
    for (size_t ix = 0; ix < 4; ix++) {
417
301M
      if (ix == 0 && iy == 0) continue;
418
282M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
282M
    }
420
75.2M
  }
421
18.8M
  HWY_ALIGN float block[4 * 8];
422
18.8M
  AFVIDCT4x4(coeff, block);
423
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
424
376M
    for (size_t ix = 0; ix < 4; ix++) {
425
301M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
301M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
301M
    }
428
75.2M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
18.8M
  block[0] = dcs[1];
431
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
432
376M
    for (size_t ix = 0; ix < 4; ix++) {
433
301M
      if (ix == 0 && iy == 0) continue;
434
282M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
282M
    }
436
75.2M
  }
437
18.8M
  ComputeScaledIDCT<4, 4>()(
438
18.8M
      block,
439
18.8M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
18.8M
            pixels_stride),
441
18.8M
      scratch_space);
442
  // IDCT4x8.
443
18.8M
  block[0] = dcs[2];
444
94.0M
  for (size_t iy = 0; iy < 4; iy++) {
445
677M
    for (size_t ix = 0; ix < 8; ix++) {
446
602M
      if (ix == 0 && iy == 0) continue;
447
583M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
583M
    }
449
75.2M
  }
450
18.8M
  ComputeScaledIDCT<4, 8>()(
451
18.8M
      block,
452
18.8M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
18.8M
      scratch_space);
454
18.8M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
455
456
HWY_MAYBE_UNUSED void TransformToPixels(const AcStrategyType strategy,
457
                                        float* JXL_RESTRICT coefficients,
458
                                        float* JXL_RESTRICT pixels,
459
                                        size_t pixels_stride,
460
267M
                                        float* scratch_space) {
461
267M
  using Type = AcStrategyType;
462
267M
  switch (strategy) {
463
22.3M
    case Type::IDENTITY: {
464
22.3M
      float dcs[4] = {};
465
22.3M
      float block00 = coefficients[0];
466
22.3M
      float block01 = coefficients[1];
467
22.3M
      float block10 = coefficients[8];
468
22.3M
      float block11 = coefficients[9];
469
22.3M
      dcs[0] = block00 + block01 + block10 + block11;
470
22.3M
      dcs[1] = block00 + block01 - block10 - block11;
471
22.3M
      dcs[2] = block00 - block01 + block10 - block11;
472
22.3M
      dcs[3] = block00 - block01 - block10 + block11;
473
67.1M
      for (size_t y = 0; y < 2; y++) {
474
134M
        for (size_t x = 0; x < 2; x++) {
475
89.5M
          float block_dc = dcs[y * 2 + x];
476
89.5M
          float residual_sum = 0;
477
447M
          for (size_t iy = 0; iy < 4; iy++) {
478
1.79G
            for (size_t ix = 0; ix < 4; ix++) {
479
1.43G
              if (ix == 0 && iy == 0) continue;
480
1.34G
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
1.34G
            }
482
358M
          }
483
89.5M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
89.5M
              block_dc - residual_sum * (1.0f / 16);
485
447M
          for (size_t iy = 0; iy < 4; iy++) {
486
1.79G
            for (size_t ix = 0; ix < 4; ix++) {
487
1.43G
              if (ix == 1 && iy == 1) continue;
488
1.34G
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
1.34G
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
1.34G
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
1.34G
            }
492
358M
          }
493
89.5M
          pixels[y * 4 * pixels_stride + x * 4] =
494
89.5M
              coefficients[(y + 2) * 8 + x + 2] +
495
89.5M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
89.5M
        }
497
44.7M
      }
498
22.3M
      break;
499
0
    }
500
20.5M
    case Type::DCT8X4: {
501
20.5M
      float dcs[2] = {};
502
20.5M
      float block0 = coefficients[0];
503
20.5M
      float block1 = coefficients[8];
504
20.5M
      dcs[0] = block0 + block1;
505
20.5M
      dcs[1] = block0 - block1;
506
61.5M
      for (size_t x = 0; x < 2; x++) {
507
41.0M
        HWY_ALIGN float block[4 * 8];
508
41.0M
        block[0] = dcs[x];
509
205M
        for (size_t iy = 0; iy < 4; iy++) {
510
1.47G
          for (size_t ix = 0; ix < 8; ix++) {
511
1.31G
            if (ix == 0 && iy == 0) continue;
512
1.27G
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
1.27G
          }
514
164M
        }
515
41.0M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
41.0M
                                  scratch_space);
517
41.0M
      }
518
20.5M
      break;
519
0
    }
520
20.0M
    case Type::DCT4X8: {
521
20.0M
      float dcs[2] = {};
522
20.0M
      float block0 = coefficients[0];
523
20.0M
      float block1 = coefficients[8];
524
20.0M
      dcs[0] = block0 + block1;
525
20.0M
      dcs[1] = block0 - block1;
526
60.0M
      for (size_t y = 0; y < 2; y++) {
527
40.0M
        HWY_ALIGN float block[4 * 8];
528
40.0M
        block[0] = dcs[y];
529
200M
        for (size_t iy = 0; iy < 4; iy++) {
530
1.44G
          for (size_t ix = 0; ix < 8; ix++) {
531
1.28G
            if (ix == 0 && iy == 0) continue;
532
1.24G
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
1.24G
          }
534
160M
        }
535
40.0M
        ComputeScaledIDCT<4, 8>()(
536
40.0M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
40.0M
            scratch_space);
538
40.0M
      }
539
20.0M
      break;
540
0
    }
541
18.8M
    case Type::DCT4X4: {
542
18.8M
      float dcs[4] = {};
543
18.8M
      float block00 = coefficients[0];
544
18.8M
      float block01 = coefficients[1];
545
18.8M
      float block10 = coefficients[8];
546
18.8M
      float block11 = coefficients[9];
547
18.8M
      dcs[0] = block00 + block01 + block10 + block11;
548
18.8M
      dcs[1] = block00 + block01 - block10 - block11;
549
18.8M
      dcs[2] = block00 - block01 + block10 - block11;
550
18.8M
      dcs[3] = block00 - block01 - block10 + block11;
551
56.6M
      for (size_t y = 0; y < 2; y++) {
552
113M
        for (size_t x = 0; x < 2; x++) {
553
75.4M
          HWY_ALIGN float block[4 * 4];
554
75.4M
          block[0] = dcs[y * 2 + x];
555
377M
          for (size_t iy = 0; iy < 4; iy++) {
556
1.50G
            for (size_t ix = 0; ix < 4; ix++) {
557
1.20G
              if (ix == 0 && iy == 0) continue;
558
1.13G
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
1.13G
            }
560
301M
          }
561
75.4M
          ComputeScaledIDCT<4, 4>()(
562
75.4M
              block,
563
75.4M
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
75.4M
              scratch_space);
565
75.4M
        }
566
37.7M
      }
567
18.8M
      break;
568
0
    }
569
22.3M
    case Type::DCT2X2: {
570
22.3M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
22.3M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
22.3M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
22.3M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
22.3M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
201M
      for (size_t y = 0; y < kBlockDim; y++) {
576
1.61G
        for (size_t x = 0; x < kBlockDim; x++) {
577
1.43G
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
1.43G
        }
579
179M
      }
580
22.3M
      break;
581
0
    }
582
8.40M
    case Type::DCT16X16: {
583
8.40M
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
8.40M
                                  scratch_space);
585
8.40M
      break;
586
0
    }
587
16.3M
    case Type::DCT16X8: {
588
16.3M
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
16.3M
                                 scratch_space);
590
16.3M
      break;
591
0
    }
592
17.0M
    case Type::DCT8X16: {
593
17.0M
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
17.0M
                                 scratch_space);
595
17.0M
      break;
596
0
    }
597
4.82k
    case Type::DCT32X8: {
598
4.82k
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
4.82k
                                 scratch_space);
600
4.82k
      break;
601
0
    }
602
7.75k
    case Type::DCT8X32: {
603
7.75k
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
7.75k
                                 scratch_space);
605
7.75k
      break;
606
0
    }
607
3.30M
    case Type::DCT32X16: {
608
3.30M
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
3.30M
                                  scratch_space);
610
3.30M
      break;
611
0
    }
612
3.51M
    case Type::DCT16X32: {
613
3.51M
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
3.51M
                                  scratch_space);
615
3.51M
      break;
616
0
    }
617
2.28M
    case Type::DCT32X32: {
618
2.28M
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
2.28M
                                  scratch_space);
620
2.28M
      break;
621
0
    }
622
30.3M
    case Type::DCT: {
623
30.3M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
30.3M
                                scratch_space);
625
30.3M
      break;
626
0
    }
627
19.8M
    case Type::AFV0: {
628
19.8M
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
19.8M
      break;
630
0
    }
631
20.8M
    case Type::AFV1: {
632
20.8M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
20.8M
      break;
634
0
    }
635
19.6M
    case Type::AFV2: {
636
19.6M
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
19.6M
      break;
638
0
    }
639
20.1M
    case Type::AFV3: {
640
20.1M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
20.1M
      break;
642
0
    }
643
989k
    case Type::DCT64X32: {
644
989k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
989k
                                  scratch_space);
646
989k
      break;
647
0
    }
648
696k
    case Type::DCT32X64: {
649
696k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
696k
                                  scratch_space);
651
696k
      break;
652
0
    }
653
385k
    case Type::DCT64X64: {
654
385k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
385k
                                  scratch_space);
656
385k
      break;
657
0
    }
658
54
    case Type::DCT128X64: {
659
54
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
54
                                   scratch_space);
661
54
      break;
662
0
    }
663
12
    case Type::DCT64X128: {
664
12
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
12
                                   scratch_space);
666
12
      break;
667
0
    }
668
24
    case Type::DCT128X128: {
669
24
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
24
                                    scratch_space);
671
24
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
30
    case Type::DCT256X256: {
684
30
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
30
                                    scratch_space);
686
30
      break;
687
0
    }
688
267M
  }
689
267M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
31.9M
                                        float* scratch_space) {
461
31.9M
  using Type = AcStrategyType;
462
31.9M
  switch (strategy) {
463
3.56M
    case Type::IDENTITY: {
464
3.56M
      float dcs[4] = {};
465
3.56M
      float block00 = coefficients[0];
466
3.56M
      float block01 = coefficients[1];
467
3.56M
      float block10 = coefficients[8];
468
3.56M
      float block11 = coefficients[9];
469
3.56M
      dcs[0] = block00 + block01 + block10 + block11;
470
3.56M
      dcs[1] = block00 + block01 - block10 - block11;
471
3.56M
      dcs[2] = block00 - block01 + block10 - block11;
472
3.56M
      dcs[3] = block00 - block01 - block10 + block11;
473
10.6M
      for (size_t y = 0; y < 2; y++) {
474
21.3M
        for (size_t x = 0; x < 2; x++) {
475
14.2M
          float block_dc = dcs[y * 2 + x];
476
14.2M
          float residual_sum = 0;
477
71.3M
          for (size_t iy = 0; iy < 4; iy++) {
478
285M
            for (size_t ix = 0; ix < 4; ix++) {
479
228M
              if (ix == 0 && iy == 0) continue;
480
213M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
213M
            }
482
57.0M
          }
483
14.2M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
14.2M
              block_dc - residual_sum * (1.0f / 16);
485
71.3M
          for (size_t iy = 0; iy < 4; iy++) {
486
285M
            for (size_t ix = 0; ix < 4; ix++) {
487
228M
              if (ix == 1 && iy == 1) continue;
488
213M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
213M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
213M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
213M
            }
492
57.0M
          }
493
14.2M
          pixels[y * 4 * pixels_stride + x * 4] =
494
14.2M
              coefficients[(y + 2) * 8 + x + 2] +
495
14.2M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
14.2M
        }
497
7.13M
      }
498
3.56M
      break;
499
0
    }
500
1.68M
    case Type::DCT8X4: {
501
1.68M
      float dcs[2] = {};
502
1.68M
      float block0 = coefficients[0];
503
1.68M
      float block1 = coefficients[8];
504
1.68M
      dcs[0] = block0 + block1;
505
1.68M
      dcs[1] = block0 - block1;
506
5.06M
      for (size_t x = 0; x < 2; x++) {
507
3.37M
        HWY_ALIGN float block[4 * 8];
508
3.37M
        block[0] = dcs[x];
509
16.8M
        for (size_t iy = 0; iy < 4; iy++) {
510
121M
          for (size_t ix = 0; ix < 8; ix++) {
511
108M
            if (ix == 0 && iy == 0) continue;
512
104M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
104M
          }
514
13.5M
        }
515
3.37M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
3.37M
                                  scratch_space);
517
3.37M
      }
518
1.68M
      break;
519
0
    }
520
1.21M
    case Type::DCT4X8: {
521
1.21M
      float dcs[2] = {};
522
1.21M
      float block0 = coefficients[0];
523
1.21M
      float block1 = coefficients[8];
524
1.21M
      dcs[0] = block0 + block1;
525
1.21M
      dcs[1] = block0 - block1;
526
3.63M
      for (size_t y = 0; y < 2; y++) {
527
2.42M
        HWY_ALIGN float block[4 * 8];
528
2.42M
        block[0] = dcs[y];
529
12.1M
        for (size_t iy = 0; iy < 4; iy++) {
530
87.1M
          for (size_t ix = 0; ix < 8; ix++) {
531
77.4M
            if (ix == 0 && iy == 0) continue;
532
75.0M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
75.0M
          }
534
9.68M
        }
535
2.42M
        ComputeScaledIDCT<4, 8>()(
536
2.42M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
2.42M
            scratch_space);
538
2.42M
      }
539
1.21M
      break;
540
0
    }
541
51.2k
    case Type::DCT4X4: {
542
51.2k
      float dcs[4] = {};
543
51.2k
      float block00 = coefficients[0];
544
51.2k
      float block01 = coefficients[1];
545
51.2k
      float block10 = coefficients[8];
546
51.2k
      float block11 = coefficients[9];
547
51.2k
      dcs[0] = block00 + block01 + block10 + block11;
548
51.2k
      dcs[1] = block00 + block01 - block10 - block11;
549
51.2k
      dcs[2] = block00 - block01 + block10 - block11;
550
51.2k
      dcs[3] = block00 - block01 - block10 + block11;
551
153k
      for (size_t y = 0; y < 2; y++) {
552
307k
        for (size_t x = 0; x < 2; x++) {
553
204k
          HWY_ALIGN float block[4 * 4];
554
204k
          block[0] = dcs[y * 2 + x];
555
1.02M
          for (size_t iy = 0; iy < 4; iy++) {
556
4.09M
            for (size_t ix = 0; ix < 4; ix++) {
557
3.27M
              if (ix == 0 && iy == 0) continue;
558
3.07M
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
3.07M
            }
560
819k
          }
561
204k
          ComputeScaledIDCT<4, 4>()(
562
204k
              block,
563
204k
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
204k
              scratch_space);
565
204k
        }
566
102k
      }
567
51.2k
      break;
568
0
    }
569
3.56M
    case Type::DCT2X2: {
570
3.56M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
3.56M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
3.56M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
3.56M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
3.56M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
32.0M
      for (size_t y = 0; y < kBlockDim; y++) {
576
256M
        for (size_t x = 0; x < kBlockDim; x++) {
577
228M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
228M
        }
579
28.5M
      }
580
3.56M
      break;
581
0
    }
582
608k
    case Type::DCT16X16: {
583
608k
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
608k
                                  scratch_space);
585
608k
      break;
586
0
    }
587
1.09M
    case Type::DCT16X8: {
588
1.09M
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
1.09M
                                 scratch_space);
590
1.09M
      break;
591
0
    }
592
1.69M
    case Type::DCT8X16: {
593
1.69M
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
1.69M
                                 scratch_space);
595
1.69M
      break;
596
0
    }
597
4.82k
    case Type::DCT32X8: {
598
4.82k
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
4.82k
                                 scratch_space);
600
4.82k
      break;
601
0
    }
602
7.75k
    case Type::DCT8X32: {
603
7.75k
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
7.75k
                                 scratch_space);
605
7.75k
      break;
606
0
    }
607
309k
    case Type::DCT32X16: {
608
309k
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
309k
                                  scratch_space);
610
309k
      break;
611
0
    }
612
494k
    case Type::DCT16X32: {
613
494k
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
494k
                                  scratch_space);
615
494k
      break;
616
0
    }
617
756k
    case Type::DCT32X32: {
618
756k
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
756k
                                  scratch_space);
620
756k
      break;
621
0
    }
622
11.5M
    case Type::DCT: {
623
11.5M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
11.5M
                                scratch_space);
625
11.5M
      break;
626
0
    }
627
999k
    case Type::AFV0: {
628
999k
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
999k
      break;
630
0
    }
631
2.06M
    case Type::AFV1: {
632
2.06M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
2.06M
      break;
634
0
    }
635
858k
    case Type::AFV2: {
636
858k
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
858k
      break;
638
0
    }
639
1.28M
    case Type::AFV3: {
640
1.28M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
1.28M
      break;
642
0
    }
643
20.9k
    case Type::DCT64X32: {
644
20.9k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
20.9k
                                  scratch_space);
646
20.9k
      break;
647
0
    }
648
35.1k
    case Type::DCT32X64: {
649
35.1k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
35.1k
                                  scratch_space);
651
35.1k
      break;
652
0
    }
653
132k
    case Type::DCT64X64: {
654
132k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
132k
                                  scratch_space);
656
132k
      break;
657
0
    }
658
54
    case Type::DCT128X64: {
659
54
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
54
                                   scratch_space);
661
54
      break;
662
0
    }
663
12
    case Type::DCT64X128: {
664
12
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
12
                                   scratch_space);
666
12
      break;
667
0
    }
668
24
    case Type::DCT128X128: {
669
24
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
24
                                    scratch_space);
671
24
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
30
    case Type::DCT256X256: {
684
30
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
30
                                    scratch_space);
686
30
      break;
687
0
    }
688
31.9M
  }
689
31.9M
}
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
235M
                                        float* scratch_space) {
461
235M
  using Type = AcStrategyType;
462
235M
  switch (strategy) {
463
18.8M
    case Type::IDENTITY: {
464
18.8M
      float dcs[4] = {};
465
18.8M
      float block00 = coefficients[0];
466
18.8M
      float block01 = coefficients[1];
467
18.8M
      float block10 = coefficients[8];
468
18.8M
      float block11 = coefficients[9];
469
18.8M
      dcs[0] = block00 + block01 + block10 + block11;
470
18.8M
      dcs[1] = block00 + block01 - block10 - block11;
471
18.8M
      dcs[2] = block00 - block01 + block10 - block11;
472
18.8M
      dcs[3] = block00 - block01 - block10 + block11;
473
56.4M
      for (size_t y = 0; y < 2; y++) {
474
112M
        for (size_t x = 0; x < 2; x++) {
475
75.2M
          float block_dc = dcs[y * 2 + x];
476
75.2M
          float residual_sum = 0;
477
376M
          for (size_t iy = 0; iy < 4; iy++) {
478
1.50G
            for (size_t ix = 0; ix < 4; ix++) {
479
1.20G
              if (ix == 0 && iy == 0) continue;
480
1.12G
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
1.12G
            }
482
301M
          }
483
75.2M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
75.2M
              block_dc - residual_sum * (1.0f / 16);
485
376M
          for (size_t iy = 0; iy < 4; iy++) {
486
1.50G
            for (size_t ix = 0; ix < 4; ix++) {
487
1.20G
              if (ix == 1 && iy == 1) continue;
488
1.12G
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
1.12G
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
1.12G
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
1.12G
            }
492
301M
          }
493
75.2M
          pixels[y * 4 * pixels_stride + x * 4] =
494
75.2M
              coefficients[(y + 2) * 8 + x + 2] +
495
75.2M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
75.2M
        }
497
37.6M
      }
498
18.8M
      break;
499
0
    }
500
18.8M
    case Type::DCT8X4: {
501
18.8M
      float dcs[2] = {};
502
18.8M
      float block0 = coefficients[0];
503
18.8M
      float block1 = coefficients[8];
504
18.8M
      dcs[0] = block0 + block1;
505
18.8M
      dcs[1] = block0 - block1;
506
56.4M
      for (size_t x = 0; x < 2; x++) {
507
37.6M
        HWY_ALIGN float block[4 * 8];
508
37.6M
        block[0] = dcs[x];
509
188M
        for (size_t iy = 0; iy < 4; iy++) {
510
1.35G
          for (size_t ix = 0; ix < 8; ix++) {
511
1.20G
            if (ix == 0 && iy == 0) continue;
512
1.16G
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
1.16G
          }
514
150M
        }
515
37.6M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
37.6M
                                  scratch_space);
517
37.6M
      }
518
18.8M
      break;
519
0
    }
520
18.8M
    case Type::DCT4X8: {
521
18.8M
      float dcs[2] = {};
522
18.8M
      float block0 = coefficients[0];
523
18.8M
      float block1 = coefficients[8];
524
18.8M
      dcs[0] = block0 + block1;
525
18.8M
      dcs[1] = block0 - block1;
526
56.4M
      for (size_t y = 0; y < 2; y++) {
527
37.6M
        HWY_ALIGN float block[4 * 8];
528
37.6M
        block[0] = dcs[y];
529
188M
        for (size_t iy = 0; iy < 4; iy++) {
530
1.35G
          for (size_t ix = 0; ix < 8; ix++) {
531
1.20G
            if (ix == 0 && iy == 0) continue;
532
1.16G
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
1.16G
          }
534
150M
        }
535
37.6M
        ComputeScaledIDCT<4, 8>()(
536
37.6M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
37.6M
            scratch_space);
538
37.6M
      }
539
18.8M
      break;
540
0
    }
541
18.8M
    case Type::DCT4X4: {
542
18.8M
      float dcs[4] = {};
543
18.8M
      float block00 = coefficients[0];
544
18.8M
      float block01 = coefficients[1];
545
18.8M
      float block10 = coefficients[8];
546
18.8M
      float block11 = coefficients[9];
547
18.8M
      dcs[0] = block00 + block01 + block10 + block11;
548
18.8M
      dcs[1] = block00 + block01 - block10 - block11;
549
18.8M
      dcs[2] = block00 - block01 + block10 - block11;
550
18.8M
      dcs[3] = block00 - block01 - block10 + block11;
551
56.4M
      for (size_t y = 0; y < 2; y++) {
552
112M
        for (size_t x = 0; x < 2; x++) {
553
75.2M
          HWY_ALIGN float block[4 * 4];
554
75.2M
          block[0] = dcs[y * 2 + x];
555
376M
          for (size_t iy = 0; iy < 4; iy++) {
556
1.50G
            for (size_t ix = 0; ix < 4; ix++) {
557
1.20G
              if (ix == 0 && iy == 0) continue;
558
1.12G
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
1.12G
            }
560
301M
          }
561
75.2M
          ComputeScaledIDCT<4, 4>()(
562
75.2M
              block,
563
75.2M
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
75.2M
              scratch_space);
565
75.2M
        }
566
37.6M
      }
567
18.8M
      break;
568
0
    }
569
18.8M
    case Type::DCT2X2: {
570
18.8M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
18.8M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
18.8M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
18.8M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
18.8M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
169M
      for (size_t y = 0; y < kBlockDim; y++) {
576
1.35G
        for (size_t x = 0; x < kBlockDim; x++) {
577
1.20G
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
1.20G
        }
579
150M
      }
580
18.8M
      break;
581
0
    }
582
7.79M
    case Type::DCT16X16: {
583
7.79M
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
7.79M
                                  scratch_space);
585
7.79M
      break;
586
0
    }
587
15.2M
    case Type::DCT16X8: {
588
15.2M
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
15.2M
                                 scratch_space);
590
15.2M
      break;
591
0
    }
592
15.3M
    case Type::DCT8X16: {
593
15.3M
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
15.3M
                                 scratch_space);
595
15.3M
      break;
596
0
    }
597
0
    case Type::DCT32X8: {
598
0
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
0
                                 scratch_space);
600
0
      break;
601
0
    }
602
0
    case Type::DCT8X32: {
603
0
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
0
                                 scratch_space);
605
0
      break;
606
0
    }
607
2.99M
    case Type::DCT32X16: {
608
2.99M
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
2.99M
                                  scratch_space);
610
2.99M
      break;
611
0
    }
612
3.01M
    case Type::DCT16X32: {
613
3.01M
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
3.01M
                                  scratch_space);
615
3.01M
      break;
616
0
    }
617
1.53M
    case Type::DCT32X32: {
618
1.53M
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
1.53M
                                  scratch_space);
620
1.53M
      break;
621
0
    }
622
18.8M
    case Type::DCT: {
623
18.8M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
18.8M
                                scratch_space);
625
18.8M
      break;
626
0
    }
627
18.8M
    case Type::AFV0: {
628
18.8M
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
18.8M
      break;
630
0
    }
631
18.8M
    case Type::AFV1: {
632
18.8M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
18.8M
      break;
634
0
    }
635
18.8M
    case Type::AFV2: {
636
18.8M
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
18.8M
      break;
638
0
    }
639
18.8M
    case Type::AFV3: {
640
18.8M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
18.8M
      break;
642
0
    }
643
968k
    case Type::DCT64X32: {
644
968k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
968k
                                  scratch_space);
646
968k
      break;
647
0
    }
648
660k
    case Type::DCT32X64: {
649
660k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
660k
                                  scratch_space);
651
660k
      break;
652
0
    }
653
253k
    case Type::DCT64X64: {
654
253k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
253k
                                  scratch_space);
656
253k
      break;
657
0
    }
658
0
    case Type::DCT128X64: {
659
0
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT64X128: {
664
0
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
0
    case Type::DCT128X128: {
669
0
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
0
                                    scratch_space);
671
0
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
235M
  }
689
235M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
690
691
HWY_MAYBE_UNUSED void LowestFrequenciesFromDC(const AcStrategyType strategy,
692
                                              const float* dc, size_t dc_stride,
693
                                              float* llf,
694
31.9M
                                              float* JXL_RESTRICT scratch) {
695
31.9M
  using Type = AcStrategyType;
696
31.9M
  HWY_ALIGN float warm_block[4 * 4];
697
31.9M
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
31.9M
  switch (strategy) {
699
1.09M
    case Type::DCT16X8: {
700
1.09M
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
1.09M
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
1.09M
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
1.09M
      break;
704
0
    }
705
1.69M
    case Type::DCT8X16: {
706
1.69M
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
1.69M
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
1.69M
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
1.69M
      break;
710
0
    }
711
608k
    case Type::DCT16X16: {
712
608k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
608k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
608k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
608k
      break;
716
0
    }
717
4.82k
    case Type::DCT32X8: {
718
4.82k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
4.82k
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
4.82k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
4.82k
      break;
722
0
    }
723
7.75k
    case Type::DCT8X32: {
724
7.75k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
7.75k
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
7.75k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
7.75k
      break;
728
0
    }
729
309k
    case Type::DCT32X16: {
730
309k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
309k
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
309k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
309k
      break;
734
0
    }
735
494k
    case Type::DCT16X32: {
736
494k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
494k
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
494k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
494k
      break;
740
0
    }
741
756k
    case Type::DCT32X32: {
742
756k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
756k
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
756k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
756k
      break;
746
0
    }
747
20.9k
    case Type::DCT64X32: {
748
20.9k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
20.9k
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
20.9k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
20.9k
      break;
752
0
    }
753
35.1k
    case Type::DCT32X64: {
754
35.1k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
35.1k
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
35.1k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
35.1k
      break;
758
0
    }
759
132k
    case Type::DCT64X64: {
760
132k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
132k
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
132k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
132k
      break;
764
0
    }
765
54
    case Type::DCT128X64: {
766
54
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
54
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
54
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
54
      break;
770
0
    }
771
12
    case Type::DCT64X128: {
772
12
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
12
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
12
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
12
      break;
776
0
    }
777
24
    case Type::DCT128X128: {
778
24
      ReinterpretingDCT<
779
24
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
24
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
24
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
24
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
30
    case Type::DCT256X256: {
799
30
      ReinterpretingDCT<
800
30
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
30
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
30
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
30
      break;
804
0
    }
805
11.5M
    case Type::DCT:
806
15.0M
    case Type::DCT2X2:
807
15.1M
    case Type::DCT4X4:
808
16.3M
    case Type::DCT4X8:
809
18.0M
    case Type::DCT8X4:
810
19.0M
    case Type::AFV0:
811
21.0M
    case Type::AFV1:
812
21.9M
    case Type::AFV2:
813
23.2M
    case Type::AFV3:
814
26.8M
    case Type::IDENTITY:
815
26.8M
      llf[0] = dc[0];
816
26.8M
      break;
817
31.9M
  };
818
31.9M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
694
31.9M
                                              float* JXL_RESTRICT scratch) {
695
31.9M
  using Type = AcStrategyType;
696
31.9M
  HWY_ALIGN float warm_block[4 * 4];
697
31.9M
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
31.9M
  switch (strategy) {
699
1.09M
    case Type::DCT16X8: {
700
1.09M
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
1.09M
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
1.09M
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
1.09M
      break;
704
0
    }
705
1.69M
    case Type::DCT8X16: {
706
1.69M
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
1.69M
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
1.69M
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
1.69M
      break;
710
0
    }
711
608k
    case Type::DCT16X16: {
712
608k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
608k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
608k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
608k
      break;
716
0
    }
717
4.82k
    case Type::DCT32X8: {
718
4.82k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
4.82k
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
4.82k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
4.82k
      break;
722
0
    }
723
7.75k
    case Type::DCT8X32: {
724
7.75k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
7.75k
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
7.75k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
7.75k
      break;
728
0
    }
729
309k
    case Type::DCT32X16: {
730
309k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
309k
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
309k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
309k
      break;
734
0
    }
735
494k
    case Type::DCT16X32: {
736
494k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
494k
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
494k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
494k
      break;
740
0
    }
741
756k
    case Type::DCT32X32: {
742
756k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
756k
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
756k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
756k
      break;
746
0
    }
747
20.9k
    case Type::DCT64X32: {
748
20.9k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
20.9k
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
20.9k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
20.9k
      break;
752
0
    }
753
35.1k
    case Type::DCT32X64: {
754
35.1k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
35.1k
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
35.1k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
35.1k
      break;
758
0
    }
759
132k
    case Type::DCT64X64: {
760
132k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
132k
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
132k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
132k
      break;
764
0
    }
765
54
    case Type::DCT128X64: {
766
54
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
54
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
54
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
54
      break;
770
0
    }
771
12
    case Type::DCT64X128: {
772
12
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
12
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
12
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
12
      break;
776
0
    }
777
24
    case Type::DCT128X128: {
778
24
      ReinterpretingDCT<
779
24
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
24
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
24
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
24
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
30
    case Type::DCT256X256: {
799
30
      ReinterpretingDCT<
800
30
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
30
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
30
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
30
      break;
804
0
    }
805
11.5M
    case Type::DCT:
806
15.0M
    case Type::DCT2X2:
807
15.1M
    case Type::DCT4X4:
808
16.3M
    case Type::DCT4X8:
809
18.0M
    case Type::DCT8X4:
810
19.0M
    case Type::AFV0:
811
21.0M
    case Type::AFV1:
812
21.9M
    case Type::AFV2:
813
23.2M
    case Type::AFV3:
814
26.8M
    case Type::IDENTITY:
815
26.8M
      llf[0] = dc[0];
816
26.8M
      break;
817
31.9M
  };
818
31.9M
}
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
819
820
}  // namespace
821
// NOLINTNEXTLINE(google-readability-namespace-comments)
822
}  // namespace HWY_NAMESPACE
823
}  // namespace jxl
824
HWY_AFTER_NAMESPACE();
825
826
#endif  // LIB_JXL_DEC_TRANSFORMS_INL_H_