Coverage Report

Created: 2026-03-31 06:56

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/dec_transforms-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include <cstring>
7
8
#include "lib/jxl/base/compiler_specific.h"
9
#include "lib/jxl/frame_dimensions.h"
10
11
#if defined(LIB_JXL_DEC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
12
#ifdef LIB_JXL_DEC_TRANSFORMS_INL_H_
13
#undef LIB_JXL_DEC_TRANSFORMS_INL_H_
14
#else
15
#define LIB_JXL_DEC_TRANSFORMS_INL_H_
16
#endif
17
18
#include <cstddef>
19
#include <hwy/highway.h>
20
21
#include "lib/jxl/ac_strategy.h"
22
#include "lib/jxl/dct-inl.h"
23
#include "lib/jxl/dct_scales.h"
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
namespace HWY_NAMESPACE {
27
namespace {
28
29
// These templates are not found via ADL.
30
using hwy::HWY_NAMESPACE::MulAdd;
31
32
// Computes the lowest-frequency LF_ROWSxLF_COLS-sized square in output, which
33
// is a DCT_ROWS*DCT_COLS-sized DCT block, by doing a ROWS*COLS DCT on the
34
// input block.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
JXL_INLINE void ReinterpretingDCT(const float* input, const size_t input_stride,
38
                                  float* output, const size_t output_stride,
39
                                  float* JXL_RESTRICT block,
40
2.71M
                                  float* JXL_RESTRICT scratch_space) {
41
2.71M
  static_assert(LF_ROWS == ROWS,
42
2.71M
                "ReinterpretingDCT should only be called with LF == N");
43
2.71M
  static_assert(LF_COLS == COLS,
44
2.71M
                "ReinterpretingDCT should only be called with LF == N");
45
2.71M
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
2.71M
                                 scratch_space);
47
2.71M
  if (ROWS < COLS) {
48
2.05M
    for (size_t y = 0; y < LF_ROWS; y++) {
49
4.47M
      for (size_t x = 0; x < LF_COLS; x++) {
50
3.34M
        output[y * output_stride + x] =
51
3.34M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
3.34M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
3.34M
      }
54
1.13M
    }
55
1.79M
  } else {
56
6.18M
    for (size_t y = 0; y < LF_COLS; y++) {
57
23.8M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
19.4M
        output[y * output_stride + x] =
59
19.4M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
19.4M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
19.4M
      }
62
4.39M
    }
63
1.79M
  }
64
2.71M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
691k
                                  float* JXL_RESTRICT scratch_space) {
41
691k
  static_assert(LF_ROWS == ROWS,
42
691k
                "ReinterpretingDCT should only be called with LF == N");
43
691k
  static_assert(LF_COLS == COLS,
44
691k
                "ReinterpretingDCT should only be called with LF == N");
45
691k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
691k
                                 scratch_space);
47
691k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
691k
  } else {
56
1.38M
    for (size_t y = 0; y < LF_COLS; y++) {
57
2.07M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.38M
        output[y * output_stride + x] =
59
1.38M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.38M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.38M
      }
62
691k
    }
63
691k
  }
64
691k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
742k
                                  float* JXL_RESTRICT scratch_space) {
41
742k
  static_assert(LF_ROWS == ROWS,
42
742k
                "ReinterpretingDCT should only be called with LF == N");
43
742k
  static_assert(LF_COLS == COLS,
44
742k
                "ReinterpretingDCT should only be called with LF == N");
45
742k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
742k
                                 scratch_space);
47
742k
  if (ROWS < COLS) {
48
1.48M
    for (size_t y = 0; y < LF_ROWS; y++) {
49
2.22M
      for (size_t x = 0; x < LF_COLS; x++) {
50
1.48M
        output[y * output_stride + x] =
51
1.48M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
1.48M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
1.48M
      }
54
742k
    }
55
742k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
742k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
499k
                                  float* JXL_RESTRICT scratch_space) {
41
499k
  static_assert(LF_ROWS == ROWS,
42
499k
                "ReinterpretingDCT should only be called with LF == N");
43
499k
  static_assert(LF_COLS == COLS,
44
499k
                "ReinterpretingDCT should only be called with LF == N");
45
499k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
499k
                                 scratch_space);
47
499k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
499k
  } else {
56
1.49M
    for (size_t y = 0; y < LF_COLS; y++) {
57
2.99M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.99M
        output[y * output_stride + x] =
59
1.99M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.99M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.99M
      }
62
999k
    }
63
499k
  }
64
499k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
42
                                  float* JXL_RESTRICT scratch_space) {
41
42
  static_assert(LF_ROWS == ROWS,
42
42
                "ReinterpretingDCT should only be called with LF == N");
43
42
  static_assert(LF_COLS == COLS,
44
42
                "ReinterpretingDCT should only be called with LF == N");
45
42
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
42
                                 scratch_space);
47
42
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
42
  } else {
56
84
    for (size_t y = 0; y < LF_COLS; y++) {
57
210
      for (size_t x = 0; x < LF_ROWS; x++) {
58
168
        output[y * output_stride + x] =
59
168
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
168
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
168
      }
62
42
    }
63
42
  }
64
42
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
132
                                  float* JXL_RESTRICT scratch_space) {
41
132
  static_assert(LF_ROWS == ROWS,
42
132
                "ReinterpretingDCT should only be called with LF == N");
43
132
  static_assert(LF_COLS == COLS,
44
132
                "ReinterpretingDCT should only be called with LF == N");
45
132
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
132
                                 scratch_space);
47
132
  if (ROWS < COLS) {
48
264
    for (size_t y = 0; y < LF_ROWS; y++) {
49
660
      for (size_t x = 0; x < LF_COLS; x++) {
50
528
        output[y * output_stride + x] =
51
528
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
528
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
528
      }
54
132
    }
55
132
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
132
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
153k
                                  float* JXL_RESTRICT scratch_space) {
41
153k
  static_assert(LF_ROWS == ROWS,
42
153k
                "ReinterpretingDCT should only be called with LF == N");
43
153k
  static_assert(LF_COLS == COLS,
44
153k
                "ReinterpretingDCT should only be called with LF == N");
45
153k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
153k
                                 scratch_space);
47
153k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
153k
  } else {
56
461k
    for (size_t y = 0; y < LF_COLS; y++) {
57
1.53M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.23M
        output[y * output_stride + x] =
59
1.23M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.23M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.23M
      }
62
307k
    }
63
153k
  }
64
153k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
158k
                                  float* JXL_RESTRICT scratch_space) {
41
158k
  static_assert(LF_ROWS == ROWS,
42
158k
                "ReinterpretingDCT should only be called with LF == N");
43
158k
  static_assert(LF_COLS == COLS,
44
158k
                "ReinterpretingDCT should only be called with LF == N");
45
158k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
158k
                                 scratch_space);
47
158k
  if (ROWS < COLS) {
48
474k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
1.58M
      for (size_t x = 0; x < LF_COLS; x++) {
50
1.26M
        output[y * output_stride + x] =
51
1.26M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
1.26M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
1.26M
      }
54
316k
    }
55
158k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
158k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
270k
                                  float* JXL_RESTRICT scratch_space) {
41
270k
  static_assert(LF_ROWS == ROWS,
42
270k
                "ReinterpretingDCT should only be called with LF == N");
43
270k
  static_assert(LF_COLS == COLS,
44
270k
                "ReinterpretingDCT should only be called with LF == N");
45
270k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
270k
                                 scratch_space);
47
270k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
270k
  } else {
56
1.35M
    for (size_t y = 0; y < LF_COLS; y++) {
57
5.41M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
4.33M
        output[y * output_stride + x] =
59
4.33M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
4.33M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
4.33M
      }
62
1.08M
    }
63
270k
  }
64
270k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
35.4k
                                  float* JXL_RESTRICT scratch_space) {
41
35.4k
  static_assert(LF_ROWS == ROWS,
42
35.4k
                "ReinterpretingDCT should only be called with LF == N");
43
35.4k
  static_assert(LF_COLS == COLS,
44
35.4k
                "ReinterpretingDCT should only be called with LF == N");
45
35.4k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
35.4k
                                 scratch_space);
47
35.4k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
35.4k
  } else {
56
177k
    for (size_t y = 0; y < LF_COLS; y++) {
57
1.27M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.13M
        output[y * output_stride + x] =
59
1.13M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.13M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.13M
      }
62
141k
    }
63
35.4k
  }
64
35.4k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
18.6k
                                  float* JXL_RESTRICT scratch_space) {
41
18.6k
  static_assert(LF_ROWS == ROWS,
42
18.6k
                "ReinterpretingDCT should only be called with LF == N");
43
18.6k
  static_assert(LF_COLS == COLS,
44
18.6k
                "ReinterpretingDCT should only be called with LF == N");
45
18.6k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
18.6k
                                 scratch_space);
47
18.6k
  if (ROWS < COLS) {
48
93.1k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
670k
      for (size_t x = 0; x < LF_COLS; x++) {
50
595k
        output[y * output_stride + x] =
51
595k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
595k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
595k
      }
54
74.4k
    }
55
18.6k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
18.6k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
145k
                                  float* JXL_RESTRICT scratch_space) {
41
145k
  static_assert(LF_ROWS == ROWS,
42
145k
                "ReinterpretingDCT should only be called with LF == N");
43
145k
  static_assert(LF_COLS == COLS,
44
145k
                "ReinterpretingDCT should only be called with LF == N");
45
145k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
145k
                                 scratch_space);
47
145k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
145k
  } else {
56
1.31M
    for (size_t y = 0; y < LF_COLS; y++) {
57
10.4M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
9.32M
        output[y * output_stride + x] =
59
9.32M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
9.32M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
9.32M
      }
62
1.16M
    }
63
145k
  }
64
145k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
3
                                  float* JXL_RESTRICT scratch_space) {
41
3
  static_assert(LF_ROWS == ROWS,
42
3
                "ReinterpretingDCT should only be called with LF == N");
43
3
  static_assert(LF_COLS == COLS,
44
3
                "ReinterpretingDCT should only be called with LF == N");
45
3
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
3
                                 scratch_space);
47
3
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
3
  } else {
56
27
    for (size_t y = 0; y < LF_COLS; y++) {
57
408
      for (size_t x = 0; x < LF_ROWS; x++) {
58
384
        output[y * output_stride + x] =
59
384
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
384
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
384
      }
62
24
    }
63
3
  }
64
3
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
3
                                  float* JXL_RESTRICT scratch_space) {
41
3
  static_assert(LF_ROWS == ROWS,
42
3
                "ReinterpretingDCT should only be called with LF == N");
43
3
  static_assert(LF_COLS == COLS,
44
3
                "ReinterpretingDCT should only be called with LF == N");
45
3
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
3
                                 scratch_space);
47
3
  if (ROWS < COLS) {
48
27
    for (size_t y = 0; y < LF_ROWS; y++) {
49
408
      for (size_t x = 0; x < LF_COLS; x++) {
50
384
        output[y * output_stride + x] =
51
384
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
384
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
384
      }
54
24
    }
55
3
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
3
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
12
                                  float* JXL_RESTRICT scratch_space) {
41
12
  static_assert(LF_ROWS == ROWS,
42
12
                "ReinterpretingDCT should only be called with LF == N");
43
12
  static_assert(LF_COLS == COLS,
44
12
                "ReinterpretingDCT should only be called with LF == N");
45
12
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
12
                                 scratch_space);
47
12
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
12
  } else {
56
204
    for (size_t y = 0; y < LF_COLS; y++) {
57
3.26k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
3.07k
        output[y * output_stride + x] =
59
3.07k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
3.07k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
3.07k
      }
62
192
    }
63
12
  }
64
12
}
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
65
66
template <size_t S>
67
55.5M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
55.5M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
55.5M
  static_assert(S % 2 == 0, "S should be even");
70
55.5M
  float temp[kDCTBlockSize];
71
55.5M
  constexpr size_t num_2x2 = S / 2;
72
185M
  for (size_t y = 0; y < num_2x2; y++) {
73
518M
    for (size_t x = 0; x < num_2x2; x++) {
74
389M
      float c00 = block[y * kBlockDim + x];
75
389M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
389M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
389M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
389M
      float r00 = c00 + c01 + c10 + c11;
79
389M
      float r01 = c00 + c01 - c10 - c11;
80
389M
      float r10 = c00 - c01 + c10 - c11;
81
389M
      float r11 = c00 - c01 - c10 + c11;
82
389M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
389M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
389M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
389M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
389M
    }
87
129M
  }
88
314M
  for (size_t y = 0; y < S; y++) {
89
1.81G
    for (size_t x = 0; x < S; x++) {
90
1.55G
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
1.55G
    }
92
259M
  }
93
55.5M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
12.3M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
12.3M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
12.3M
  static_assert(S % 2 == 0, "S should be even");
70
12.3M
  float temp[kDCTBlockSize];
71
12.3M
  constexpr size_t num_2x2 = S / 2;
72
24.7M
  for (size_t y = 0; y < num_2x2; y++) {
73
24.7M
    for (size_t x = 0; x < num_2x2; x++) {
74
12.3M
      float c00 = block[y * kBlockDim + x];
75
12.3M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
12.3M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
12.3M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
12.3M
      float r00 = c00 + c01 + c10 + c11;
79
12.3M
      float r01 = c00 + c01 - c10 - c11;
80
12.3M
      float r10 = c00 - c01 + c10 - c11;
81
12.3M
      float r11 = c00 - c01 - c10 + c11;
82
12.3M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
12.3M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
12.3M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
12.3M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
12.3M
    }
87
12.3M
  }
88
37.1M
  for (size_t y = 0; y < S; y++) {
89
74.3M
    for (size_t x = 0; x < S; x++) {
90
49.5M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
49.5M
    }
92
24.7M
  }
93
12.3M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
12.3M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
12.3M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
12.3M
  static_assert(S % 2 == 0, "S should be even");
70
12.3M
  float temp[kDCTBlockSize];
71
12.3M
  constexpr size_t num_2x2 = S / 2;
72
37.1M
  for (size_t y = 0; y < num_2x2; y++) {
73
74.3M
    for (size_t x = 0; x < num_2x2; x++) {
74
49.5M
      float c00 = block[y * kBlockDim + x];
75
49.5M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
49.5M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
49.5M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
49.5M
      float r00 = c00 + c01 + c10 + c11;
79
49.5M
      float r01 = c00 + c01 - c10 - c11;
80
49.5M
      float r10 = c00 - c01 + c10 - c11;
81
49.5M
      float r11 = c00 - c01 - c10 + c11;
82
49.5M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
49.5M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
49.5M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
49.5M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
49.5M
    }
87
24.7M
  }
88
61.9M
  for (size_t y = 0; y < S; y++) {
89
247M
    for (size_t x = 0; x < S; x++) {
90
198M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
198M
    }
92
49.5M
  }
93
12.3M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
12.3M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
12.3M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
12.3M
  static_assert(S % 2 == 0, "S should be even");
70
12.3M
  float temp[kDCTBlockSize];
71
12.3M
  constexpr size_t num_2x2 = S / 2;
72
61.9M
  for (size_t y = 0; y < num_2x2; y++) {
73
247M
    for (size_t x = 0; x < num_2x2; x++) {
74
198M
      float c00 = block[y * kBlockDim + x];
75
198M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
198M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
198M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
198M
      float r00 = c00 + c01 + c10 + c11;
79
198M
      float r01 = c00 + c01 - c10 - c11;
80
198M
      float r10 = c00 - c01 + c10 - c11;
81
198M
      float r11 = c00 - c01 - c10 + c11;
82
198M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
198M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
198M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
198M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
198M
    }
87
49.5M
  }
88
111M
  for (size_t y = 0; y < S; y++) {
89
891M
    for (size_t x = 0; x < S; x++) {
90
792M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
792M
    }
92
99.1M
  }
93
12.3M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
6.13M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
6.13M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
6.13M
  static_assert(S % 2 == 0, "S should be even");
70
6.13M
  float temp[kDCTBlockSize];
71
6.13M
  constexpr size_t num_2x2 = S / 2;
72
12.2M
  for (size_t y = 0; y < num_2x2; y++) {
73
12.2M
    for (size_t x = 0; x < num_2x2; x++) {
74
6.13M
      float c00 = block[y * kBlockDim + x];
75
6.13M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
6.13M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
6.13M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
6.13M
      float r00 = c00 + c01 + c10 + c11;
79
6.13M
      float r01 = c00 + c01 - c10 - c11;
80
6.13M
      float r10 = c00 - c01 + c10 - c11;
81
6.13M
      float r11 = c00 - c01 - c10 + c11;
82
6.13M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
6.13M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
6.13M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
6.13M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
6.13M
    }
87
6.13M
  }
88
18.4M
  for (size_t y = 0; y < S; y++) {
89
36.8M
    for (size_t x = 0; x < S; x++) {
90
24.5M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
24.5M
    }
92
12.2M
  }
93
6.13M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
6.13M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
6.13M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
6.13M
  static_assert(S % 2 == 0, "S should be even");
70
6.13M
  float temp[kDCTBlockSize];
71
6.13M
  constexpr size_t num_2x2 = S / 2;
72
18.4M
  for (size_t y = 0; y < num_2x2; y++) {
73
36.8M
    for (size_t x = 0; x < num_2x2; x++) {
74
24.5M
      float c00 = block[y * kBlockDim + x];
75
24.5M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
24.5M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
24.5M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
24.5M
      float r00 = c00 + c01 + c10 + c11;
79
24.5M
      float r01 = c00 + c01 - c10 - c11;
80
24.5M
      float r10 = c00 - c01 + c10 - c11;
81
24.5M
      float r11 = c00 - c01 - c10 + c11;
82
24.5M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
24.5M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
24.5M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
24.5M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
24.5M
    }
87
12.2M
  }
88
30.6M
  for (size_t y = 0; y < S; y++) {
89
122M
    for (size_t x = 0; x < S; x++) {
90
98.1M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
98.1M
    }
92
24.5M
  }
93
6.13M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
6.13M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
6.13M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
6.13M
  static_assert(S % 2 == 0, "S should be even");
70
6.13M
  float temp[kDCTBlockSize];
71
6.13M
  constexpr size_t num_2x2 = S / 2;
72
30.6M
  for (size_t y = 0; y < num_2x2; y++) {
73
122M
    for (size_t x = 0; x < num_2x2; x++) {
74
98.1M
      float c00 = block[y * kBlockDim + x];
75
98.1M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
98.1M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
98.1M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
98.1M
      float r00 = c00 + c01 + c10 + c11;
79
98.1M
      float r01 = c00 + c01 - c10 - c11;
80
98.1M
      float r10 = c00 - c01 + c10 - c11;
81
98.1M
      float r11 = c00 - c01 - c10 + c11;
82
98.1M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
98.1M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
98.1M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
98.1M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
98.1M
    }
87
24.5M
  }
88
55.2M
  for (size_t y = 0; y < S; y++) {
89
441M
    for (size_t x = 0; x < S; x++) {
90
392M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
392M
    }
92
49.0M
  }
93
6.13M
}
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
94
95
50.4M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
50.4M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
50.4M
      {
98
50.4M
          0.25,
99
50.4M
          0.25,
100
50.4M
          0.25,
101
50.4M
          0.25,
102
50.4M
          0.25,
103
50.4M
          0.25,
104
50.4M
          0.25,
105
50.4M
          0.25,
106
50.4M
          0.25,
107
50.4M
          0.25,
108
50.4M
          0.25,
109
50.4M
          0.25,
110
50.4M
          0.25,
111
50.4M
          0.25,
112
50.4M
          0.25,
113
50.4M
          0.25,
114
50.4M
      },
115
50.4M
      {
116
50.4M
          0.876902929799142f,
117
50.4M
          0.2206518106944235f,
118
50.4M
          -0.10140050393753763f,
119
50.4M
          -0.1014005039375375f,
120
50.4M
          0.2206518106944236f,
121
50.4M
          -0.10140050393753777f,
122
50.4M
          -0.10140050393753772f,
123
50.4M
          -0.10140050393753763f,
124
50.4M
          -0.10140050393753758f,
125
50.4M
          -0.10140050393753769f,
126
50.4M
          -0.1014005039375375f,
127
50.4M
          -0.10140050393753768f,
128
50.4M
          -0.10140050393753768f,
129
50.4M
          -0.10140050393753759f,
130
50.4M
          -0.10140050393753763f,
131
50.4M
          -0.10140050393753741f,
132
50.4M
      },
133
50.4M
      {
134
50.4M
          0.0,
135
50.4M
          0.0,
136
50.4M
          0.40670075830260755f,
137
50.4M
          0.44444816619734445f,
138
50.4M
          0.0,
139
50.4M
          0.0,
140
50.4M
          0.19574399372042936f,
141
50.4M
          0.2929100136981264f,
142
50.4M
          -0.40670075830260716f,
143
50.4M
          -0.19574399372042872f,
144
50.4M
          0.0,
145
50.4M
          0.11379074460448091f,
146
50.4M
          -0.44444816619734384f,
147
50.4M
          -0.29291001369812636f,
148
50.4M
          -0.1137907446044814f,
149
50.4M
          0.0,
150
50.4M
      },
151
50.4M
      {
152
50.4M
          0.0,
153
50.4M
          0.0,
154
50.4M
          -0.21255748058288748f,
155
50.4M
          0.3085497062849767f,
156
50.4M
          0.0,
157
50.4M
          0.4706702258572536f,
158
50.4M
          -0.1621205195722993f,
159
50.4M
          0.0,
160
50.4M
          -0.21255748058287047f,
161
50.4M
          -0.16212051957228327f,
162
50.4M
          -0.47067022585725277f,
163
50.4M
          -0.1464291867126764f,
164
50.4M
          0.3085497062849487f,
165
50.4M
          0.0,
166
50.4M
          -0.14642918671266536f,
167
50.4M
          0.4251149611657548f,
168
50.4M
      },
169
50.4M
      {
170
50.4M
          0.0,
171
50.4M
          -0.7071067811865474f,
172
50.4M
          0.0,
173
50.4M
          0.0,
174
50.4M
          0.7071067811865476f,
175
50.4M
          0.0,
176
50.4M
          0.0,
177
50.4M
          0.0,
178
50.4M
          0.0,
179
50.4M
          0.0,
180
50.4M
          0.0,
181
50.4M
          0.0,
182
50.4M
          0.0,
183
50.4M
          0.0,
184
50.4M
          0.0,
185
50.4M
          0.0,
186
50.4M
      },
187
50.4M
      {
188
50.4M
          -0.4105377591765233f,
189
50.4M
          0.6235485373547691f,
190
50.4M
          -0.06435071657946274f,
191
50.4M
          -0.06435071657946266f,
192
50.4M
          0.6235485373547694f,
193
50.4M
          -0.06435071657946284f,
194
50.4M
          -0.0643507165794628f,
195
50.4M
          -0.06435071657946274f,
196
50.4M
          -0.06435071657946272f,
197
50.4M
          -0.06435071657946279f,
198
50.4M
          -0.06435071657946266f,
199
50.4M
          -0.06435071657946277f,
200
50.4M
          -0.06435071657946277f,
201
50.4M
          -0.06435071657946273f,
202
50.4M
          -0.06435071657946274f,
203
50.4M
          -0.0643507165794626f,
204
50.4M
      },
205
50.4M
      {
206
50.4M
          0.0,
207
50.4M
          0.0,
208
50.4M
          -0.4517556589999482f,
209
50.4M
          0.15854503551840063f,
210
50.4M
          0.0,
211
50.4M
          -0.04038515160822202f,
212
50.4M
          0.0074182263792423875f,
213
50.4M
          0.39351034269210167f,
214
50.4M
          -0.45175565899994635f,
215
50.4M
          0.007418226379244351f,
216
50.4M
          0.1107416575309343f,
217
50.4M
          0.08298163094882051f,
218
50.4M
          0.15854503551839705f,
219
50.4M
          0.3935103426921022f,
220
50.4M
          0.0829816309488214f,
221
50.4M
          -0.45175565899994796f,
222
50.4M
      },
223
50.4M
      {
224
50.4M
          0.0,
225
50.4M
          0.0,
226
50.4M
          -0.304684750724869f,
227
50.4M
          0.5112616136591823f,
228
50.4M
          0.0,
229
50.4M
          0.0,
230
50.4M
          -0.290480129728998f,
231
50.4M
          -0.06578701549142804f,
232
50.4M
          0.304684750724884f,
233
50.4M
          0.2904801297290076f,
234
50.4M
          0.0,
235
50.4M
          -0.23889773523344604f,
236
50.4M
          -0.5112616136592012f,
237
50.4M
          0.06578701549142545f,
238
50.4M
          0.23889773523345467f,
239
50.4M
          0.0,
240
50.4M
      },
241
50.4M
      {
242
50.4M
          0.0,
243
50.4M
          0.0,
244
50.4M
          0.3017929516615495f,
245
50.4M
          0.25792362796341184f,
246
50.4M
          0.0,
247
50.4M
          0.16272340142866204f,
248
50.4M
          0.09520022653475037f,
249
50.4M
          0.0,
250
50.4M
          0.3017929516615503f,
251
50.4M
          0.09520022653475055f,
252
50.4M
          -0.16272340142866173f,
253
50.4M
          -0.35312385449816297f,
254
50.4M
          0.25792362796341295f,
255
50.4M
          0.0,
256
50.4M
          -0.3531238544981624f,
257
50.4M
          -0.6035859033230976f,
258
50.4M
      },
259
50.4M
      {
260
50.4M
          0.0,
261
50.4M
          0.0,
262
50.4M
          0.40824829046386274f,
263
50.4M
          0.0,
264
50.4M
          0.0,
265
50.4M
          0.0,
266
50.4M
          0.0,
267
50.4M
          -0.4082482904638628f,
268
50.4M
          -0.4082482904638635f,
269
50.4M
          0.0,
270
50.4M
          0.0,
271
50.4M
          -0.40824829046386296f,
272
50.4M
          0.0,
273
50.4M
          0.4082482904638634f,
274
50.4M
          0.408248290463863f,
275
50.4M
          0.0,
276
50.4M
      },
277
50.4M
      {
278
50.4M
          0.0,
279
50.4M
          0.0,
280
50.4M
          0.1747866975480809f,
281
50.4M
          0.0812611176717539f,
282
50.4M
          0.0,
283
50.4M
          0.0,
284
50.4M
          -0.3675398009862027f,
285
50.4M
          -0.307882213957909f,
286
50.4M
          -0.17478669754808135f,
287
50.4M
          0.3675398009862011f,
288
50.4M
          0.0,
289
50.4M
          0.4826689115059883f,
290
50.4M
          -0.08126111767175039f,
291
50.4M
          0.30788221395790305f,
292
50.4M
          -0.48266891150598584f,
293
50.4M
          0.0,
294
50.4M
      },
295
50.4M
      {
296
50.4M
          0.0,
297
50.4M
          0.0,
298
50.4M
          -0.21105601049335784f,
299
50.4M
          0.18567180916109802f,
300
50.4M
          0.0,
301
50.4M
          0.0,
302
50.4M
          0.49215859013738733f,
303
50.4M
          -0.38525013709251915f,
304
50.4M
          0.21105601049335806f,
305
50.4M
          -0.49215859013738905f,
306
50.4M
          0.0,
307
50.4M
          0.17419412659916217f,
308
50.4M
          -0.18567180916109904f,
309
50.4M
          0.3852501370925211f,
310
50.4M
          -0.1741941265991621f,
311
50.4M
          0.0,
312
50.4M
      },
313
50.4M
      {
314
50.4M
          0.0,
315
50.4M
          0.0,
316
50.4M
          -0.14266084808807264f,
317
50.4M
          -0.3416446842253372f,
318
50.4M
          0.0,
319
50.4M
          0.7367497537172237f,
320
50.4M
          0.24627107722075148f,
321
50.4M
          -0.08574019035519306f,
322
50.4M
          -0.14266084808807344f,
323
50.4M
          0.24627107722075137f,
324
50.4M
          0.14883399227113567f,
325
50.4M
          -0.04768680350229251f,
326
50.4M
          -0.3416446842253373f,
327
50.4M
          -0.08574019035519267f,
328
50.4M
          -0.047686803502292804f,
329
50.4M
          -0.14266084808807242f,
330
50.4M
      },
331
50.4M
      {
332
50.4M
          0.0,
333
50.4M
          0.0,
334
50.4M
          -0.13813540350758585f,
335
50.4M
          0.3302282550303788f,
336
50.4M
          0.0,
337
50.4M
          0.08755115000587084f,
338
50.4M
          -0.07946706605909573f,
339
50.4M
          -0.4613374887461511f,
340
50.4M
          -0.13813540350758294f,
341
50.4M
          -0.07946706605910261f,
342
50.4M
          0.49724647109535086f,
343
50.4M
          0.12538059448563663f,
344
50.4M
          0.3302282550303805f,
345
50.4M
          -0.4613374887461554f,
346
50.4M
          0.12538059448564315f,
347
50.4M
          -0.13813540350758452f,
348
50.4M
      },
349
50.4M
      {
350
50.4M
          0.0,
351
50.4M
          0.0,
352
50.4M
          -0.17437602599651067f,
353
50.4M
          0.0702790691196284f,
354
50.4M
          0.0,
355
50.4M
          -0.2921026642334881f,
356
50.4M
          0.3623817333531167f,
357
50.4M
          0.0,
358
50.4M
          -0.1743760259965108f,
359
50.4M
          0.36238173335311646f,
360
50.4M
          0.29210266423348785f,
361
50.4M
          -0.4326608024727445f,
362
50.4M
          0.07027906911962818f,
363
50.4M
          0.0,
364
50.4M
          -0.4326608024727457f,
365
50.4M
          0.34875205199302267f,
366
50.4M
      },
367
50.4M
      {
368
50.4M
          0.0,
369
50.4M
          0.0,
370
50.4M
          0.11354987314994337f,
371
50.4M
          -0.07417504595810355f,
372
50.4M
          0.0,
373
50.4M
          0.19402893032594343f,
374
50.4M
          -0.435190496523228f,
375
50.4M
          0.21918684838857466f,
376
50.4M
          0.11354987314994257f,
377
50.4M
          -0.4351904965232251f,
378
50.4M
          0.5550443808910661f,
379
50.4M
          -0.25468277124066463f,
380
50.4M
          -0.07417504595810233f,
381
50.4M
          0.2191868483885728f,
382
50.4M
          -0.25468277124066413f,
383
50.4M
          0.1135498731499429f,
384
50.4M
      },
385
50.4M
  };
386
387
50.4M
  const HWY_CAPPED(float, 16) d;
388
151M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
100M
    auto pixel = Zero(d);
390
1.71G
    for (size_t j = 0; j < 16; j++) {
391
1.61G
      auto cf = Set(d, coeffs[j]);
392
1.61G
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
1.61G
      pixel = MulAdd(cf, basis, pixel);
394
1.61G
    }
395
100M
    Store(pixel, d, pixels + i);
396
100M
  }
397
50.4M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
49.5M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
49.5M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
49.5M
      {
98
49.5M
          0.25,
99
49.5M
          0.25,
100
49.5M
          0.25,
101
49.5M
          0.25,
102
49.5M
          0.25,
103
49.5M
          0.25,
104
49.5M
          0.25,
105
49.5M
          0.25,
106
49.5M
          0.25,
107
49.5M
          0.25,
108
49.5M
          0.25,
109
49.5M
          0.25,
110
49.5M
          0.25,
111
49.5M
          0.25,
112
49.5M
          0.25,
113
49.5M
          0.25,
114
49.5M
      },
115
49.5M
      {
116
49.5M
          0.876902929799142f,
117
49.5M
          0.2206518106944235f,
118
49.5M
          -0.10140050393753763f,
119
49.5M
          -0.1014005039375375f,
120
49.5M
          0.2206518106944236f,
121
49.5M
          -0.10140050393753777f,
122
49.5M
          -0.10140050393753772f,
123
49.5M
          -0.10140050393753763f,
124
49.5M
          -0.10140050393753758f,
125
49.5M
          -0.10140050393753769f,
126
49.5M
          -0.1014005039375375f,
127
49.5M
          -0.10140050393753768f,
128
49.5M
          -0.10140050393753768f,
129
49.5M
          -0.10140050393753759f,
130
49.5M
          -0.10140050393753763f,
131
49.5M
          -0.10140050393753741f,
132
49.5M
      },
133
49.5M
      {
134
49.5M
          0.0,
135
49.5M
          0.0,
136
49.5M
          0.40670075830260755f,
137
49.5M
          0.44444816619734445f,
138
49.5M
          0.0,
139
49.5M
          0.0,
140
49.5M
          0.19574399372042936f,
141
49.5M
          0.2929100136981264f,
142
49.5M
          -0.40670075830260716f,
143
49.5M
          -0.19574399372042872f,
144
49.5M
          0.0,
145
49.5M
          0.11379074460448091f,
146
49.5M
          -0.44444816619734384f,
147
49.5M
          -0.29291001369812636f,
148
49.5M
          -0.1137907446044814f,
149
49.5M
          0.0,
150
49.5M
      },
151
49.5M
      {
152
49.5M
          0.0,
153
49.5M
          0.0,
154
49.5M
          -0.21255748058288748f,
155
49.5M
          0.3085497062849767f,
156
49.5M
          0.0,
157
49.5M
          0.4706702258572536f,
158
49.5M
          -0.1621205195722993f,
159
49.5M
          0.0,
160
49.5M
          -0.21255748058287047f,
161
49.5M
          -0.16212051957228327f,
162
49.5M
          -0.47067022585725277f,
163
49.5M
          -0.1464291867126764f,
164
49.5M
          0.3085497062849487f,
165
49.5M
          0.0,
166
49.5M
          -0.14642918671266536f,
167
49.5M
          0.4251149611657548f,
168
49.5M
      },
169
49.5M
      {
170
49.5M
          0.0,
171
49.5M
          -0.7071067811865474f,
172
49.5M
          0.0,
173
49.5M
          0.0,
174
49.5M
          0.7071067811865476f,
175
49.5M
          0.0,
176
49.5M
          0.0,
177
49.5M
          0.0,
178
49.5M
          0.0,
179
49.5M
          0.0,
180
49.5M
          0.0,
181
49.5M
          0.0,
182
49.5M
          0.0,
183
49.5M
          0.0,
184
49.5M
          0.0,
185
49.5M
          0.0,
186
49.5M
      },
187
49.5M
      {
188
49.5M
          -0.4105377591765233f,
189
49.5M
          0.6235485373547691f,
190
49.5M
          -0.06435071657946274f,
191
49.5M
          -0.06435071657946266f,
192
49.5M
          0.6235485373547694f,
193
49.5M
          -0.06435071657946284f,
194
49.5M
          -0.0643507165794628f,
195
49.5M
          -0.06435071657946274f,
196
49.5M
          -0.06435071657946272f,
197
49.5M
          -0.06435071657946279f,
198
49.5M
          -0.06435071657946266f,
199
49.5M
          -0.06435071657946277f,
200
49.5M
          -0.06435071657946277f,
201
49.5M
          -0.06435071657946273f,
202
49.5M
          -0.06435071657946274f,
203
49.5M
          -0.0643507165794626f,
204
49.5M
      },
205
49.5M
      {
206
49.5M
          0.0,
207
49.5M
          0.0,
208
49.5M
          -0.4517556589999482f,
209
49.5M
          0.15854503551840063f,
210
49.5M
          0.0,
211
49.5M
          -0.04038515160822202f,
212
49.5M
          0.0074182263792423875f,
213
49.5M
          0.39351034269210167f,
214
49.5M
          -0.45175565899994635f,
215
49.5M
          0.007418226379244351f,
216
49.5M
          0.1107416575309343f,
217
49.5M
          0.08298163094882051f,
218
49.5M
          0.15854503551839705f,
219
49.5M
          0.3935103426921022f,
220
49.5M
          0.0829816309488214f,
221
49.5M
          -0.45175565899994796f,
222
49.5M
      },
223
49.5M
      {
224
49.5M
          0.0,
225
49.5M
          0.0,
226
49.5M
          -0.304684750724869f,
227
49.5M
          0.5112616136591823f,
228
49.5M
          0.0,
229
49.5M
          0.0,
230
49.5M
          -0.290480129728998f,
231
49.5M
          -0.06578701549142804f,
232
49.5M
          0.304684750724884f,
233
49.5M
          0.2904801297290076f,
234
49.5M
          0.0,
235
49.5M
          -0.23889773523344604f,
236
49.5M
          -0.5112616136592012f,
237
49.5M
          0.06578701549142545f,
238
49.5M
          0.23889773523345467f,
239
49.5M
          0.0,
240
49.5M
      },
241
49.5M
      {
242
49.5M
          0.0,
243
49.5M
          0.0,
244
49.5M
          0.3017929516615495f,
245
49.5M
          0.25792362796341184f,
246
49.5M
          0.0,
247
49.5M
          0.16272340142866204f,
248
49.5M
          0.09520022653475037f,
249
49.5M
          0.0,
250
49.5M
          0.3017929516615503f,
251
49.5M
          0.09520022653475055f,
252
49.5M
          -0.16272340142866173f,
253
49.5M
          -0.35312385449816297f,
254
49.5M
          0.25792362796341295f,
255
49.5M
          0.0,
256
49.5M
          -0.3531238544981624f,
257
49.5M
          -0.6035859033230976f,
258
49.5M
      },
259
49.5M
      {
260
49.5M
          0.0,
261
49.5M
          0.0,
262
49.5M
          0.40824829046386274f,
263
49.5M
          0.0,
264
49.5M
          0.0,
265
49.5M
          0.0,
266
49.5M
          0.0,
267
49.5M
          -0.4082482904638628f,
268
49.5M
          -0.4082482904638635f,
269
49.5M
          0.0,
270
49.5M
          0.0,
271
49.5M
          -0.40824829046386296f,
272
49.5M
          0.0,
273
49.5M
          0.4082482904638634f,
274
49.5M
          0.408248290463863f,
275
49.5M
          0.0,
276
49.5M
      },
277
49.5M
      {
278
49.5M
          0.0,
279
49.5M
          0.0,
280
49.5M
          0.1747866975480809f,
281
49.5M
          0.0812611176717539f,
282
49.5M
          0.0,
283
49.5M
          0.0,
284
49.5M
          -0.3675398009862027f,
285
49.5M
          -0.307882213957909f,
286
49.5M
          -0.17478669754808135f,
287
49.5M
          0.3675398009862011f,
288
49.5M
          0.0,
289
49.5M
          0.4826689115059883f,
290
49.5M
          -0.08126111767175039f,
291
49.5M
          0.30788221395790305f,
292
49.5M
          -0.48266891150598584f,
293
49.5M
          0.0,
294
49.5M
      },
295
49.5M
      {
296
49.5M
          0.0,
297
49.5M
          0.0,
298
49.5M
          -0.21105601049335784f,
299
49.5M
          0.18567180916109802f,
300
49.5M
          0.0,
301
49.5M
          0.0,
302
49.5M
          0.49215859013738733f,
303
49.5M
          -0.38525013709251915f,
304
49.5M
          0.21105601049335806f,
305
49.5M
          -0.49215859013738905f,
306
49.5M
          0.0,
307
49.5M
          0.17419412659916217f,
308
49.5M
          -0.18567180916109904f,
309
49.5M
          0.3852501370925211f,
310
49.5M
          -0.1741941265991621f,
311
49.5M
          0.0,
312
49.5M
      },
313
49.5M
      {
314
49.5M
          0.0,
315
49.5M
          0.0,
316
49.5M
          -0.14266084808807264f,
317
49.5M
          -0.3416446842253372f,
318
49.5M
          0.0,
319
49.5M
          0.7367497537172237f,
320
49.5M
          0.24627107722075148f,
321
49.5M
          -0.08574019035519306f,
322
49.5M
          -0.14266084808807344f,
323
49.5M
          0.24627107722075137f,
324
49.5M
          0.14883399227113567f,
325
49.5M
          -0.04768680350229251f,
326
49.5M
          -0.3416446842253373f,
327
49.5M
          -0.08574019035519267f,
328
49.5M
          -0.047686803502292804f,
329
49.5M
          -0.14266084808807242f,
330
49.5M
      },
331
49.5M
      {
332
49.5M
          0.0,
333
49.5M
          0.0,
334
49.5M
          -0.13813540350758585f,
335
49.5M
          0.3302282550303788f,
336
49.5M
          0.0,
337
49.5M
          0.08755115000587084f,
338
49.5M
          -0.07946706605909573f,
339
49.5M
          -0.4613374887461511f,
340
49.5M
          -0.13813540350758294f,
341
49.5M
          -0.07946706605910261f,
342
49.5M
          0.49724647109535086f,
343
49.5M
          0.12538059448563663f,
344
49.5M
          0.3302282550303805f,
345
49.5M
          -0.4613374887461554f,
346
49.5M
          0.12538059448564315f,
347
49.5M
          -0.13813540350758452f,
348
49.5M
      },
349
49.5M
      {
350
49.5M
          0.0,
351
49.5M
          0.0,
352
49.5M
          -0.17437602599651067f,
353
49.5M
          0.0702790691196284f,
354
49.5M
          0.0,
355
49.5M
          -0.2921026642334881f,
356
49.5M
          0.3623817333531167f,
357
49.5M
          0.0,
358
49.5M
          -0.1743760259965108f,
359
49.5M
          0.36238173335311646f,
360
49.5M
          0.29210266423348785f,
361
49.5M
          -0.4326608024727445f,
362
49.5M
          0.07027906911962818f,
363
49.5M
          0.0,
364
49.5M
          -0.4326608024727457f,
365
49.5M
          0.34875205199302267f,
366
49.5M
      },
367
49.5M
      {
368
49.5M
          0.0,
369
49.5M
          0.0,
370
49.5M
          0.11354987314994337f,
371
49.5M
          -0.07417504595810355f,
372
49.5M
          0.0,
373
49.5M
          0.19402893032594343f,
374
49.5M
          -0.435190496523228f,
375
49.5M
          0.21918684838857466f,
376
49.5M
          0.11354987314994257f,
377
49.5M
          -0.4351904965232251f,
378
49.5M
          0.5550443808910661f,
379
49.5M
          -0.25468277124066463f,
380
49.5M
          -0.07417504595810233f,
381
49.5M
          0.2191868483885728f,
382
49.5M
          -0.25468277124066413f,
383
49.5M
          0.1135498731499429f,
384
49.5M
      },
385
49.5M
  };
386
387
49.5M
  const HWY_CAPPED(float, 16) d;
388
148M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
99.1M
    auto pixel = Zero(d);
390
1.68G
    for (size_t j = 0; j < 16; j++) {
391
1.58G
      auto cf = Set(d, coeffs[j]);
392
1.58G
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
1.58G
      pixel = MulAdd(cf, basis, pixel);
394
1.58G
    }
395
99.1M
    Store(pixel, d, pixels + i);
396
99.1M
  }
397
49.5M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
936k
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
936k
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
936k
      {
98
936k
          0.25,
99
936k
          0.25,
100
936k
          0.25,
101
936k
          0.25,
102
936k
          0.25,
103
936k
          0.25,
104
936k
          0.25,
105
936k
          0.25,
106
936k
          0.25,
107
936k
          0.25,
108
936k
          0.25,
109
936k
          0.25,
110
936k
          0.25,
111
936k
          0.25,
112
936k
          0.25,
113
936k
          0.25,
114
936k
      },
115
936k
      {
116
936k
          0.876902929799142f,
117
936k
          0.2206518106944235f,
118
936k
          -0.10140050393753763f,
119
936k
          -0.1014005039375375f,
120
936k
          0.2206518106944236f,
121
936k
          -0.10140050393753777f,
122
936k
          -0.10140050393753772f,
123
936k
          -0.10140050393753763f,
124
936k
          -0.10140050393753758f,
125
936k
          -0.10140050393753769f,
126
936k
          -0.1014005039375375f,
127
936k
          -0.10140050393753768f,
128
936k
          -0.10140050393753768f,
129
936k
          -0.10140050393753759f,
130
936k
          -0.10140050393753763f,
131
936k
          -0.10140050393753741f,
132
936k
      },
133
936k
      {
134
936k
          0.0,
135
936k
          0.0,
136
936k
          0.40670075830260755f,
137
936k
          0.44444816619734445f,
138
936k
          0.0,
139
936k
          0.0,
140
936k
          0.19574399372042936f,
141
936k
          0.2929100136981264f,
142
936k
          -0.40670075830260716f,
143
936k
          -0.19574399372042872f,
144
936k
          0.0,
145
936k
          0.11379074460448091f,
146
936k
          -0.44444816619734384f,
147
936k
          -0.29291001369812636f,
148
936k
          -0.1137907446044814f,
149
936k
          0.0,
150
936k
      },
151
936k
      {
152
936k
          0.0,
153
936k
          0.0,
154
936k
          -0.21255748058288748f,
155
936k
          0.3085497062849767f,
156
936k
          0.0,
157
936k
          0.4706702258572536f,
158
936k
          -0.1621205195722993f,
159
936k
          0.0,
160
936k
          -0.21255748058287047f,
161
936k
          -0.16212051957228327f,
162
936k
          -0.47067022585725277f,
163
936k
          -0.1464291867126764f,
164
936k
          0.3085497062849487f,
165
936k
          0.0,
166
936k
          -0.14642918671266536f,
167
936k
          0.4251149611657548f,
168
936k
      },
169
936k
      {
170
936k
          0.0,
171
936k
          -0.7071067811865474f,
172
936k
          0.0,
173
936k
          0.0,
174
936k
          0.7071067811865476f,
175
936k
          0.0,
176
936k
          0.0,
177
936k
          0.0,
178
936k
          0.0,
179
936k
          0.0,
180
936k
          0.0,
181
936k
          0.0,
182
936k
          0.0,
183
936k
          0.0,
184
936k
          0.0,
185
936k
          0.0,
186
936k
      },
187
936k
      {
188
936k
          -0.4105377591765233f,
189
936k
          0.6235485373547691f,
190
936k
          -0.06435071657946274f,
191
936k
          -0.06435071657946266f,
192
936k
          0.6235485373547694f,
193
936k
          -0.06435071657946284f,
194
936k
          -0.0643507165794628f,
195
936k
          -0.06435071657946274f,
196
936k
          -0.06435071657946272f,
197
936k
          -0.06435071657946279f,
198
936k
          -0.06435071657946266f,
199
936k
          -0.06435071657946277f,
200
936k
          -0.06435071657946277f,
201
936k
          -0.06435071657946273f,
202
936k
          -0.06435071657946274f,
203
936k
          -0.0643507165794626f,
204
936k
      },
205
936k
      {
206
936k
          0.0,
207
936k
          0.0,
208
936k
          -0.4517556589999482f,
209
936k
          0.15854503551840063f,
210
936k
          0.0,
211
936k
          -0.04038515160822202f,
212
936k
          0.0074182263792423875f,
213
936k
          0.39351034269210167f,
214
936k
          -0.45175565899994635f,
215
936k
          0.007418226379244351f,
216
936k
          0.1107416575309343f,
217
936k
          0.08298163094882051f,
218
936k
          0.15854503551839705f,
219
936k
          0.3935103426921022f,
220
936k
          0.0829816309488214f,
221
936k
          -0.45175565899994796f,
222
936k
      },
223
936k
      {
224
936k
          0.0,
225
936k
          0.0,
226
936k
          -0.304684750724869f,
227
936k
          0.5112616136591823f,
228
936k
          0.0,
229
936k
          0.0,
230
936k
          -0.290480129728998f,
231
936k
          -0.06578701549142804f,
232
936k
          0.304684750724884f,
233
936k
          0.2904801297290076f,
234
936k
          0.0,
235
936k
          -0.23889773523344604f,
236
936k
          -0.5112616136592012f,
237
936k
          0.06578701549142545f,
238
936k
          0.23889773523345467f,
239
936k
          0.0,
240
936k
      },
241
936k
      {
242
936k
          0.0,
243
936k
          0.0,
244
936k
          0.3017929516615495f,
245
936k
          0.25792362796341184f,
246
936k
          0.0,
247
936k
          0.16272340142866204f,
248
936k
          0.09520022653475037f,
249
936k
          0.0,
250
936k
          0.3017929516615503f,
251
936k
          0.09520022653475055f,
252
936k
          -0.16272340142866173f,
253
936k
          -0.35312385449816297f,
254
936k
          0.25792362796341295f,
255
936k
          0.0,
256
936k
          -0.3531238544981624f,
257
936k
          -0.6035859033230976f,
258
936k
      },
259
936k
      {
260
936k
          0.0,
261
936k
          0.0,
262
936k
          0.40824829046386274f,
263
936k
          0.0,
264
936k
          0.0,
265
936k
          0.0,
266
936k
          0.0,
267
936k
          -0.4082482904638628f,
268
936k
          -0.4082482904638635f,
269
936k
          0.0,
270
936k
          0.0,
271
936k
          -0.40824829046386296f,
272
936k
          0.0,
273
936k
          0.4082482904638634f,
274
936k
          0.408248290463863f,
275
936k
          0.0,
276
936k
      },
277
936k
      {
278
936k
          0.0,
279
936k
          0.0,
280
936k
          0.1747866975480809f,
281
936k
          0.0812611176717539f,
282
936k
          0.0,
283
936k
          0.0,
284
936k
          -0.3675398009862027f,
285
936k
          -0.307882213957909f,
286
936k
          -0.17478669754808135f,
287
936k
          0.3675398009862011f,
288
936k
          0.0,
289
936k
          0.4826689115059883f,
290
936k
          -0.08126111767175039f,
291
936k
          0.30788221395790305f,
292
936k
          -0.48266891150598584f,
293
936k
          0.0,
294
936k
      },
295
936k
      {
296
936k
          0.0,
297
936k
          0.0,
298
936k
          -0.21105601049335784f,
299
936k
          0.18567180916109802f,
300
936k
          0.0,
301
936k
          0.0,
302
936k
          0.49215859013738733f,
303
936k
          -0.38525013709251915f,
304
936k
          0.21105601049335806f,
305
936k
          -0.49215859013738905f,
306
936k
          0.0,
307
936k
          0.17419412659916217f,
308
936k
          -0.18567180916109904f,
309
936k
          0.3852501370925211f,
310
936k
          -0.1741941265991621f,
311
936k
          0.0,
312
936k
      },
313
936k
      {
314
936k
          0.0,
315
936k
          0.0,
316
936k
          -0.14266084808807264f,
317
936k
          -0.3416446842253372f,
318
936k
          0.0,
319
936k
          0.7367497537172237f,
320
936k
          0.24627107722075148f,
321
936k
          -0.08574019035519306f,
322
936k
          -0.14266084808807344f,
323
936k
          0.24627107722075137f,
324
936k
          0.14883399227113567f,
325
936k
          -0.04768680350229251f,
326
936k
          -0.3416446842253373f,
327
936k
          -0.08574019035519267f,
328
936k
          -0.047686803502292804f,
329
936k
          -0.14266084808807242f,
330
936k
      },
331
936k
      {
332
936k
          0.0,
333
936k
          0.0,
334
936k
          -0.13813540350758585f,
335
936k
          0.3302282550303788f,
336
936k
          0.0,
337
936k
          0.08755115000587084f,
338
936k
          -0.07946706605909573f,
339
936k
          -0.4613374887461511f,
340
936k
          -0.13813540350758294f,
341
936k
          -0.07946706605910261f,
342
936k
          0.49724647109535086f,
343
936k
          0.12538059448563663f,
344
936k
          0.3302282550303805f,
345
936k
          -0.4613374887461554f,
346
936k
          0.12538059448564315f,
347
936k
          -0.13813540350758452f,
348
936k
      },
349
936k
      {
350
936k
          0.0,
351
936k
          0.0,
352
936k
          -0.17437602599651067f,
353
936k
          0.0702790691196284f,
354
936k
          0.0,
355
936k
          -0.2921026642334881f,
356
936k
          0.3623817333531167f,
357
936k
          0.0,
358
936k
          -0.1743760259965108f,
359
936k
          0.36238173335311646f,
360
936k
          0.29210266423348785f,
361
936k
          -0.4326608024727445f,
362
936k
          0.07027906911962818f,
363
936k
          0.0,
364
936k
          -0.4326608024727457f,
365
936k
          0.34875205199302267f,
366
936k
      },
367
936k
      {
368
936k
          0.0,
369
936k
          0.0,
370
936k
          0.11354987314994337f,
371
936k
          -0.07417504595810355f,
372
936k
          0.0,
373
936k
          0.19402893032594343f,
374
936k
          -0.435190496523228f,
375
936k
          0.21918684838857466f,
376
936k
          0.11354987314994257f,
377
936k
          -0.4351904965232251f,
378
936k
          0.5550443808910661f,
379
936k
          -0.25468277124066463f,
380
936k
          -0.07417504595810233f,
381
936k
          0.2191868483885728f,
382
936k
          -0.25468277124066413f,
383
936k
          0.1135498731499429f,
384
936k
      },
385
936k
  };
386
387
936k
  const HWY_CAPPED(float, 16) d;
388
2.80M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
1.87M
    auto pixel = Zero(d);
390
31.8M
    for (size_t j = 0; j < 16; j++) {
391
29.9M
      auto cf = Set(d, coeffs[j]);
392
29.9M
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
29.9M
      pixel = MulAdd(cf, basis, pixel);
394
29.9M
    }
395
1.87M
    Store(pixel, d, pixels + i);
396
1.87M
  }
397
936k
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
398
399
template <size_t afv_kind>
400
void AFVTransformToPixels(const float* JXL_RESTRICT coefficients,
401
50.4M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
50.4M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
50.4M
  size_t afv_x = afv_kind & 1;
404
50.4M
  size_t afv_y = afv_kind / 2;
405
50.4M
  float dcs[3] = {};
406
50.4M
  float block00 = coefficients[0];
407
50.4M
  float block01 = coefficients[1];
408
50.4M
  float block10 = coefficients[8];
409
50.4M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
50.4M
  dcs[1] = (block00 + block10 - block01);
411
50.4M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
50.4M
  HWY_ALIGN float coeff[4 * 4];
414
50.4M
  coeff[0] = dcs[0];
415
252M
  for (size_t iy = 0; iy < 4; iy++) {
416
1.00G
    for (size_t ix = 0; ix < 4; ix++) {
417
807M
      if (ix == 0 && iy == 0) continue;
418
757M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
757M
    }
420
201M
  }
421
50.4M
  HWY_ALIGN float block[4 * 8];
422
50.4M
  AFVIDCT4x4(coeff, block);
423
252M
  for (size_t iy = 0; iy < 4; iy++) {
424
1.00G
    for (size_t ix = 0; ix < 4; ix++) {
425
807M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
807M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
807M
    }
428
201M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
50.4M
  block[0] = dcs[1];
431
252M
  for (size_t iy = 0; iy < 4; iy++) {
432
1.00G
    for (size_t ix = 0; ix < 4; ix++) {
433
807M
      if (ix == 0 && iy == 0) continue;
434
757M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
757M
    }
436
201M
  }
437
50.4M
  ComputeScaledIDCT<4, 4>()(
438
50.4M
      block,
439
50.4M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
50.4M
            pixels_stride),
441
50.4M
      scratch_space);
442
  // IDCT4x8.
443
50.4M
  block[0] = dcs[2];
444
252M
  for (size_t iy = 0; iy < 4; iy++) {
445
1.81G
    for (size_t ix = 0; ix < 8; ix++) {
446
1.61G
      if (ix == 0 && iy == 0) continue;
447
1.56G
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
1.56G
    }
449
201M
  }
450
50.4M
  ComputeScaledIDCT<4, 8>()(
451
50.4M
      block,
452
50.4M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
50.4M
      scratch_space);
454
50.4M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
12.3M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
12.3M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
12.3M
  size_t afv_x = afv_kind & 1;
404
12.3M
  size_t afv_y = afv_kind / 2;
405
12.3M
  float dcs[3] = {};
406
12.3M
  float block00 = coefficients[0];
407
12.3M
  float block01 = coefficients[1];
408
12.3M
  float block10 = coefficients[8];
409
12.3M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
12.3M
  dcs[1] = (block00 + block10 - block01);
411
12.3M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
12.3M
  HWY_ALIGN float coeff[4 * 4];
414
12.3M
  coeff[0] = dcs[0];
415
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
416
247M
    for (size_t ix = 0; ix < 4; ix++) {
417
198M
      if (ix == 0 && iy == 0) continue;
418
185M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
185M
    }
420
49.5M
  }
421
12.3M
  HWY_ALIGN float block[4 * 8];
422
12.3M
  AFVIDCT4x4(coeff, block);
423
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
424
247M
    for (size_t ix = 0; ix < 4; ix++) {
425
198M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
198M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
198M
    }
428
49.5M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
12.3M
  block[0] = dcs[1];
431
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
432
247M
    for (size_t ix = 0; ix < 4; ix++) {
433
198M
      if (ix == 0 && iy == 0) continue;
434
185M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
185M
    }
436
49.5M
  }
437
12.3M
  ComputeScaledIDCT<4, 4>()(
438
12.3M
      block,
439
12.3M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
12.3M
            pixels_stride),
441
12.3M
      scratch_space);
442
  // IDCT4x8.
443
12.3M
  block[0] = dcs[2];
444
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
445
445M
    for (size_t ix = 0; ix < 8; ix++) {
446
396M
      if (ix == 0 && iy == 0) continue;
447
384M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
384M
    }
449
49.5M
  }
450
12.3M
  ComputeScaledIDCT<4, 8>()(
451
12.3M
      block,
452
12.3M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
12.3M
      scratch_space);
454
12.3M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
12.3M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
12.3M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
12.3M
  size_t afv_x = afv_kind & 1;
404
12.3M
  size_t afv_y = afv_kind / 2;
405
12.3M
  float dcs[3] = {};
406
12.3M
  float block00 = coefficients[0];
407
12.3M
  float block01 = coefficients[1];
408
12.3M
  float block10 = coefficients[8];
409
12.3M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
12.3M
  dcs[1] = (block00 + block10 - block01);
411
12.3M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
12.3M
  HWY_ALIGN float coeff[4 * 4];
414
12.3M
  coeff[0] = dcs[0];
415
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
416
247M
    for (size_t ix = 0; ix < 4; ix++) {
417
198M
      if (ix == 0 && iy == 0) continue;
418
185M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
185M
    }
420
49.5M
  }
421
12.3M
  HWY_ALIGN float block[4 * 8];
422
12.3M
  AFVIDCT4x4(coeff, block);
423
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
424
247M
    for (size_t ix = 0; ix < 4; ix++) {
425
198M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
198M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
198M
    }
428
49.5M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
12.3M
  block[0] = dcs[1];
431
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
432
247M
    for (size_t ix = 0; ix < 4; ix++) {
433
198M
      if (ix == 0 && iy == 0) continue;
434
185M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
185M
    }
436
49.5M
  }
437
12.3M
  ComputeScaledIDCT<4, 4>()(
438
12.3M
      block,
439
12.3M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
12.3M
            pixels_stride),
441
12.3M
      scratch_space);
442
  // IDCT4x8.
443
12.3M
  block[0] = dcs[2];
444
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
445
445M
    for (size_t ix = 0; ix < 8; ix++) {
446
396M
      if (ix == 0 && iy == 0) continue;
447
384M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
384M
    }
449
49.5M
  }
450
12.3M
  ComputeScaledIDCT<4, 8>()(
451
12.3M
      block,
452
12.3M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
12.3M
      scratch_space);
454
12.3M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
12.3M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
12.3M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
12.3M
  size_t afv_x = afv_kind & 1;
404
12.3M
  size_t afv_y = afv_kind / 2;
405
12.3M
  float dcs[3] = {};
406
12.3M
  float block00 = coefficients[0];
407
12.3M
  float block01 = coefficients[1];
408
12.3M
  float block10 = coefficients[8];
409
12.3M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
12.3M
  dcs[1] = (block00 + block10 - block01);
411
12.3M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
12.3M
  HWY_ALIGN float coeff[4 * 4];
414
12.3M
  coeff[0] = dcs[0];
415
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
416
247M
    for (size_t ix = 0; ix < 4; ix++) {
417
198M
      if (ix == 0 && iy == 0) continue;
418
185M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
185M
    }
420
49.5M
  }
421
12.3M
  HWY_ALIGN float block[4 * 8];
422
12.3M
  AFVIDCT4x4(coeff, block);
423
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
424
247M
    for (size_t ix = 0; ix < 4; ix++) {
425
198M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
198M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
198M
    }
428
49.5M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
12.3M
  block[0] = dcs[1];
431
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
432
247M
    for (size_t ix = 0; ix < 4; ix++) {
433
198M
      if (ix == 0 && iy == 0) continue;
434
185M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
185M
    }
436
49.5M
  }
437
12.3M
  ComputeScaledIDCT<4, 4>()(
438
12.3M
      block,
439
12.3M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
12.3M
            pixels_stride),
441
12.3M
      scratch_space);
442
  // IDCT4x8.
443
12.3M
  block[0] = dcs[2];
444
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
445
445M
    for (size_t ix = 0; ix < 8; ix++) {
446
396M
      if (ix == 0 && iy == 0) continue;
447
384M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
384M
    }
449
49.5M
  }
450
12.3M
  ComputeScaledIDCT<4, 8>()(
451
12.3M
      block,
452
12.3M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
12.3M
      scratch_space);
454
12.3M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
12.3M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
12.3M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
12.3M
  size_t afv_x = afv_kind & 1;
404
12.3M
  size_t afv_y = afv_kind / 2;
405
12.3M
  float dcs[3] = {};
406
12.3M
  float block00 = coefficients[0];
407
12.3M
  float block01 = coefficients[1];
408
12.3M
  float block10 = coefficients[8];
409
12.3M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
12.3M
  dcs[1] = (block00 + block10 - block01);
411
12.3M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
12.3M
  HWY_ALIGN float coeff[4 * 4];
414
12.3M
  coeff[0] = dcs[0];
415
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
416
247M
    for (size_t ix = 0; ix < 4; ix++) {
417
198M
      if (ix == 0 && iy == 0) continue;
418
185M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
185M
    }
420
49.5M
  }
421
12.3M
  HWY_ALIGN float block[4 * 8];
422
12.3M
  AFVIDCT4x4(coeff, block);
423
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
424
247M
    for (size_t ix = 0; ix < 4; ix++) {
425
198M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
198M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
198M
    }
428
49.5M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
12.3M
  block[0] = dcs[1];
431
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
432
247M
    for (size_t ix = 0; ix < 4; ix++) {
433
198M
      if (ix == 0 && iy == 0) continue;
434
185M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
185M
    }
436
49.5M
  }
437
12.3M
  ComputeScaledIDCT<4, 4>()(
438
12.3M
      block,
439
12.3M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
12.3M
            pixels_stride),
441
12.3M
      scratch_space);
442
  // IDCT4x8.
443
12.3M
  block[0] = dcs[2];
444
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
445
445M
    for (size_t ix = 0; ix < 8; ix++) {
446
396M
      if (ix == 0 && iy == 0) continue;
447
384M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
384M
    }
449
49.5M
  }
450
12.3M
  ComputeScaledIDCT<4, 8>()(
451
12.3M
      block,
452
12.3M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
12.3M
      scratch_space);
454
12.3M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
313k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
313k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
313k
  size_t afv_x = afv_kind & 1;
404
313k
  size_t afv_y = afv_kind / 2;
405
313k
  float dcs[3] = {};
406
313k
  float block00 = coefficients[0];
407
313k
  float block01 = coefficients[1];
408
313k
  float block10 = coefficients[8];
409
313k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
313k
  dcs[1] = (block00 + block10 - block01);
411
313k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
313k
  HWY_ALIGN float coeff[4 * 4];
414
313k
  coeff[0] = dcs[0];
415
1.56M
  for (size_t iy = 0; iy < 4; iy++) {
416
6.27M
    for (size_t ix = 0; ix < 4; ix++) {
417
5.01M
      if (ix == 0 && iy == 0) continue;
418
4.70M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
4.70M
    }
420
1.25M
  }
421
313k
  HWY_ALIGN float block[4 * 8];
422
313k
  AFVIDCT4x4(coeff, block);
423
1.56M
  for (size_t iy = 0; iy < 4; iy++) {
424
6.27M
    for (size_t ix = 0; ix < 4; ix++) {
425
5.01M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
5.01M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
5.01M
    }
428
1.25M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
313k
  block[0] = dcs[1];
431
1.56M
  for (size_t iy = 0; iy < 4; iy++) {
432
6.27M
    for (size_t ix = 0; ix < 4; ix++) {
433
5.01M
      if (ix == 0 && iy == 0) continue;
434
4.70M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
4.70M
    }
436
1.25M
  }
437
313k
  ComputeScaledIDCT<4, 4>()(
438
313k
      block,
439
313k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
313k
            pixels_stride),
441
313k
      scratch_space);
442
  // IDCT4x8.
443
313k
  block[0] = dcs[2];
444
1.56M
  for (size_t iy = 0; iy < 4; iy++) {
445
11.2M
    for (size_t ix = 0; ix < 8; ix++) {
446
10.0M
      if (ix == 0 && iy == 0) continue;
447
9.72M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
9.72M
    }
449
1.25M
  }
450
313k
  ComputeScaledIDCT<4, 8>()(
451
313k
      block,
452
313k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
313k
      scratch_space);
454
313k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
172k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
172k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
172k
  size_t afv_x = afv_kind & 1;
404
172k
  size_t afv_y = afv_kind / 2;
405
172k
  float dcs[3] = {};
406
172k
  float block00 = coefficients[0];
407
172k
  float block01 = coefficients[1];
408
172k
  float block10 = coefficients[8];
409
172k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
172k
  dcs[1] = (block00 + block10 - block01);
411
172k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
172k
  HWY_ALIGN float coeff[4 * 4];
414
172k
  coeff[0] = dcs[0];
415
864k
  for (size_t iy = 0; iy < 4; iy++) {
416
3.45M
    for (size_t ix = 0; ix < 4; ix++) {
417
2.76M
      if (ix == 0 && iy == 0) continue;
418
2.59M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
2.59M
    }
420
691k
  }
421
172k
  HWY_ALIGN float block[4 * 8];
422
172k
  AFVIDCT4x4(coeff, block);
423
864k
  for (size_t iy = 0; iy < 4; iy++) {
424
3.45M
    for (size_t ix = 0; ix < 4; ix++) {
425
2.76M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
2.76M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
2.76M
    }
428
691k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
172k
  block[0] = dcs[1];
431
864k
  for (size_t iy = 0; iy < 4; iy++) {
432
3.45M
    for (size_t ix = 0; ix < 4; ix++) {
433
2.76M
      if (ix == 0 && iy == 0) continue;
434
2.59M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
2.59M
    }
436
691k
  }
437
172k
  ComputeScaledIDCT<4, 4>()(
438
172k
      block,
439
172k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
172k
            pixels_stride),
441
172k
      scratch_space);
442
  // IDCT4x8.
443
172k
  block[0] = dcs[2];
444
864k
  for (size_t iy = 0; iy < 4; iy++) {
445
6.22M
    for (size_t ix = 0; ix < 8; ix++) {
446
5.53M
      if (ix == 0 && iy == 0) continue;
447
5.35M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
5.35M
    }
449
691k
  }
450
172k
  ComputeScaledIDCT<4, 8>()(
451
172k
      block,
452
172k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
172k
      scratch_space);
454
172k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
219k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
219k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
219k
  size_t afv_x = afv_kind & 1;
404
219k
  size_t afv_y = afv_kind / 2;
405
219k
  float dcs[3] = {};
406
219k
  float block00 = coefficients[0];
407
219k
  float block01 = coefficients[1];
408
219k
  float block10 = coefficients[8];
409
219k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
219k
  dcs[1] = (block00 + block10 - block01);
411
219k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
219k
  HWY_ALIGN float coeff[4 * 4];
414
219k
  coeff[0] = dcs[0];
415
1.09M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.38M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.50M
      if (ix == 0 && iy == 0) continue;
418
3.28M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
3.28M
    }
420
876k
  }
421
219k
  HWY_ALIGN float block[4 * 8];
422
219k
  AFVIDCT4x4(coeff, block);
423
1.09M
  for (size_t iy = 0; iy < 4; iy++) {
424
4.38M
    for (size_t ix = 0; ix < 4; ix++) {
425
3.50M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
3.50M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
3.50M
    }
428
876k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
219k
  block[0] = dcs[1];
431
1.09M
  for (size_t iy = 0; iy < 4; iy++) {
432
4.38M
    for (size_t ix = 0; ix < 4; ix++) {
433
3.50M
      if (ix == 0 && iy == 0) continue;
434
3.28M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
3.28M
    }
436
876k
  }
437
219k
  ComputeScaledIDCT<4, 4>()(
438
219k
      block,
439
219k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
219k
            pixels_stride),
441
219k
      scratch_space);
442
  // IDCT4x8.
443
219k
  block[0] = dcs[2];
444
1.09M
  for (size_t iy = 0; iy < 4; iy++) {
445
7.88M
    for (size_t ix = 0; ix < 8; ix++) {
446
7.01M
      if (ix == 0 && iy == 0) continue;
447
6.79M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
6.79M
    }
449
876k
  }
450
219k
  ComputeScaledIDCT<4, 8>()(
451
219k
      block,
452
219k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
219k
      scratch_space);
454
219k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
230k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
230k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
230k
  size_t afv_x = afv_kind & 1;
404
230k
  size_t afv_y = afv_kind / 2;
405
230k
  float dcs[3] = {};
406
230k
  float block00 = coefficients[0];
407
230k
  float block01 = coefficients[1];
408
230k
  float block10 = coefficients[8];
409
230k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
230k
  dcs[1] = (block00 + block10 - block01);
411
230k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
230k
  HWY_ALIGN float coeff[4 * 4];
414
230k
  coeff[0] = dcs[0];
415
1.15M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.61M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.68M
      if (ix == 0 && iy == 0) continue;
418
3.45M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
3.45M
    }
420
922k
  }
421
230k
  HWY_ALIGN float block[4 * 8];
422
230k
  AFVIDCT4x4(coeff, block);
423
1.15M
  for (size_t iy = 0; iy < 4; iy++) {
424
4.61M
    for (size_t ix = 0; ix < 4; ix++) {
425
3.68M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
3.68M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
3.68M
    }
428
922k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
230k
  block[0] = dcs[1];
431
1.15M
  for (size_t iy = 0; iy < 4; iy++) {
432
4.61M
    for (size_t ix = 0; ix < 4; ix++) {
433
3.68M
      if (ix == 0 && iy == 0) continue;
434
3.45M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
3.45M
    }
436
922k
  }
437
230k
  ComputeScaledIDCT<4, 4>()(
438
230k
      block,
439
230k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
230k
            pixels_stride),
441
230k
      scratch_space);
442
  // IDCT4x8.
443
230k
  block[0] = dcs[2];
444
1.15M
  for (size_t iy = 0; iy < 4; iy++) {
445
8.30M
    for (size_t ix = 0; ix < 8; ix++) {
446
7.37M
      if (ix == 0 && iy == 0) continue;
447
7.14M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
7.14M
    }
449
922k
  }
450
230k
  ComputeScaledIDCT<4, 8>()(
451
230k
      block,
452
230k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
230k
      scratch_space);
454
230k
}
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
455
456
HWY_MAYBE_UNUSED void TransformToPixels(const AcStrategyType strategy,
457
                                        float* JXL_RESTRICT coefficients,
458
                                        float* JXL_RESTRICT pixels,
459
                                        size_t pixels_stride,
460
173M
                                        float* scratch_space) {
461
173M
  using Type = AcStrategyType;
462
173M
  switch (strategy) {
463
15.6M
    case Type::IDENTITY: {
464
15.6M
      float dcs[4] = {};
465
15.6M
      float block00 = coefficients[0];
466
15.6M
      float block01 = coefficients[1];
467
15.6M
      float block10 = coefficients[8];
468
15.6M
      float block11 = coefficients[9];
469
15.6M
      dcs[0] = block00 + block01 + block10 + block11;
470
15.6M
      dcs[1] = block00 + block01 - block10 - block11;
471
15.6M
      dcs[2] = block00 - block01 + block10 - block11;
472
15.6M
      dcs[3] = block00 - block01 - block10 + block11;
473
47.0M
      for (size_t y = 0; y < 2; y++) {
474
94.0M
        for (size_t x = 0; x < 2; x++) {
475
62.7M
          float block_dc = dcs[y * 2 + x];
476
62.7M
          float residual_sum = 0;
477
313M
          for (size_t iy = 0; iy < 4; iy++) {
478
1.25G
            for (size_t ix = 0; ix < 4; ix++) {
479
1.00G
              if (ix == 0 && iy == 0) continue;
480
940M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
940M
            }
482
250M
          }
483
62.7M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
62.7M
              block_dc - residual_sum * (1.0f / 16);
485
313M
          for (size_t iy = 0; iy < 4; iy++) {
486
1.25G
            for (size_t ix = 0; ix < 4; ix++) {
487
1.00G
              if (ix == 1 && iy == 1) continue;
488
940M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
940M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
940M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
940M
            }
492
250M
          }
493
62.7M
          pixels[y * 4 * pixels_stride + x * 4] =
494
62.7M
              coefficients[(y + 2) * 8 + x + 2] +
495
62.7M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
62.7M
        }
497
31.3M
      }
498
15.6M
      break;
499
0
    }
500
12.8M
    case Type::DCT8X4: {
501
12.8M
      float dcs[2] = {};
502
12.8M
      float block0 = coefficients[0];
503
12.8M
      float block1 = coefficients[8];
504
12.8M
      dcs[0] = block0 + block1;
505
12.8M
      dcs[1] = block0 - block1;
506
38.5M
      for (size_t x = 0; x < 2; x++) {
507
25.6M
        HWY_ALIGN float block[4 * 8];
508
25.6M
        block[0] = dcs[x];
509
128M
        for (size_t iy = 0; iy < 4; iy++) {
510
924M
          for (size_t ix = 0; ix < 8; ix++) {
511
822M
            if (ix == 0 && iy == 0) continue;
512
796M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
796M
          }
514
102M
        }
515
25.6M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
25.6M
                                  scratch_space);
517
25.6M
      }
518
12.8M
      break;
519
0
    }
520
12.5M
    case Type::DCT4X8: {
521
12.5M
      float dcs[2] = {};
522
12.5M
      float block0 = coefficients[0];
523
12.5M
      float block1 = coefficients[8];
524
12.5M
      dcs[0] = block0 + block1;
525
12.5M
      dcs[1] = block0 - block1;
526
37.7M
      for (size_t y = 0; y < 2; y++) {
527
25.1M
        HWY_ALIGN float block[4 * 8];
528
25.1M
        block[0] = dcs[y];
529
125M
        for (size_t iy = 0; iy < 4; iy++) {
530
907M
          for (size_t ix = 0; ix < 8; ix++) {
531
806M
            if (ix == 0 && iy == 0) continue;
532
781M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
781M
          }
534
100M
        }
535
25.1M
        ComputeScaledIDCT<4, 8>()(
536
25.1M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
25.1M
            scratch_space);
538
25.1M
      }
539
12.5M
      break;
540
0
    }
541
12.3M
    case Type::DCT4X4: {
542
12.3M
      float dcs[4] = {};
543
12.3M
      float block00 = coefficients[0];
544
12.3M
      float block01 = coefficients[1];
545
12.3M
      float block10 = coefficients[8];
546
12.3M
      float block11 = coefficients[9];
547
12.3M
      dcs[0] = block00 + block01 + block10 + block11;
548
12.3M
      dcs[1] = block00 + block01 - block10 - block11;
549
12.3M
      dcs[2] = block00 - block01 + block10 - block11;
550
12.3M
      dcs[3] = block00 - block01 - block10 + block11;
551
37.1M
      for (size_t y = 0; y < 2; y++) {
552
74.3M
        for (size_t x = 0; x < 2; x++) {
553
49.5M
          HWY_ALIGN float block[4 * 4];
554
49.5M
          block[0] = dcs[y * 2 + x];
555
247M
          for (size_t iy = 0; iy < 4; iy++) {
556
991M
            for (size_t ix = 0; ix < 4; ix++) {
557
793M
              if (ix == 0 && iy == 0) continue;
558
743M
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
743M
            }
560
198M
          }
561
49.5M
          ComputeScaledIDCT<4, 4>()(
562
49.5M
              block,
563
49.5M
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
49.5M
              scratch_space);
565
49.5M
        }
566
24.7M
      }
567
12.3M
      break;
568
0
    }
569
18.5M
    case Type::DCT2X2: {
570
18.5M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
18.5M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
18.5M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
18.5M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
18.5M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
166M
      for (size_t y = 0; y < kBlockDim; y++) {
576
1.33G
        for (size_t x = 0; x < kBlockDim; x++) {
577
1.18G
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
1.18G
        }
579
148M
      }
580
18.5M
      break;
581
0
    }
582
5.53M
    case Type::DCT16X16: {
583
5.53M
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
5.53M
                                  scratch_space);
585
5.53M
      break;
586
0
    }
587
10.6M
    case Type::DCT16X8: {
588
10.6M
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
10.6M
                                 scratch_space);
590
10.6M
      break;
591
0
    }
592
10.6M
    case Type::DCT8X16: {
593
10.6M
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
10.6M
                                 scratch_space);
595
10.6M
      break;
596
0
    }
597
42
    case Type::DCT32X8: {
598
42
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
42
                                 scratch_space);
600
42
      break;
601
0
    }
602
132
    case Type::DCT8X32: {
603
132
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
132
                                 scratch_space);
605
132
      break;
606
0
    }
607
2.12M
    case Type::DCT32X16: {
608
2.12M
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
2.12M
                                  scratch_space);
610
2.12M
      break;
611
0
    }
612
2.11M
    case Type::DCT16X32: {
613
2.11M
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
2.11M
                                  scratch_space);
615
2.11M
      break;
616
0
    }
617
1.27M
    case Type::DCT32X32: {
618
1.27M
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
1.27M
                                  scratch_space);
620
1.27M
      break;
621
0
    }
622
17.3M
    case Type::DCT: {
623
17.3M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
17.3M
                                scratch_space);
625
17.3M
      break;
626
0
    }
627
12.7M
    case Type::AFV0: {
628
12.7M
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
12.7M
      break;
630
0
    }
631
12.5M
    case Type::AFV1: {
632
12.5M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
12.5M
      break;
634
0
    }
635
12.6M
    case Type::AFV2: {
636
12.6M
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
12.6M
      break;
638
0
    }
639
12.6M
    case Type::AFV3: {
640
12.6M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
12.6M
      break;
642
0
    }
643
635k
    case Type::DCT64X32: {
644
635k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
635k
                                  scratch_space);
646
635k
      break;
647
0
    }
648
388k
    case Type::DCT32X64: {
649
388k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
388k
                                  scratch_space);
651
388k
      break;
652
0
    }
653
317k
    case Type::DCT64X64: {
654
317k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
317k
                                  scratch_space);
656
317k
      break;
657
0
    }
658
3
    case Type::DCT128X64: {
659
3
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
3
                                   scratch_space);
661
3
      break;
662
0
    }
663
3
    case Type::DCT64X128: {
664
3
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
3
                                   scratch_space);
666
3
      break;
667
0
    }
668
12
    case Type::DCT128X128: {
669
12
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
12
                                    scratch_space);
671
12
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
173M
  }
689
173M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
154M
                                        float* scratch_space) {
461
154M
  using Type = AcStrategyType;
462
154M
  switch (strategy) {
463
12.3M
    case Type::IDENTITY: {
464
12.3M
      float dcs[4] = {};
465
12.3M
      float block00 = coefficients[0];
466
12.3M
      float block01 = coefficients[1];
467
12.3M
      float block10 = coefficients[8];
468
12.3M
      float block11 = coefficients[9];
469
12.3M
      dcs[0] = block00 + block01 + block10 + block11;
470
12.3M
      dcs[1] = block00 + block01 - block10 - block11;
471
12.3M
      dcs[2] = block00 - block01 + block10 - block11;
472
12.3M
      dcs[3] = block00 - block01 - block10 + block11;
473
37.1M
      for (size_t y = 0; y < 2; y++) {
474
74.3M
        for (size_t x = 0; x < 2; x++) {
475
49.5M
          float block_dc = dcs[y * 2 + x];
476
49.5M
          float residual_sum = 0;
477
247M
          for (size_t iy = 0; iy < 4; iy++) {
478
991M
            for (size_t ix = 0; ix < 4; ix++) {
479
792M
              if (ix == 0 && iy == 0) continue;
480
743M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
743M
            }
482
198M
          }
483
49.5M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
49.5M
              block_dc - residual_sum * (1.0f / 16);
485
247M
          for (size_t iy = 0; iy < 4; iy++) {
486
991M
            for (size_t ix = 0; ix < 4; ix++) {
487
792M
              if (ix == 1 && iy == 1) continue;
488
743M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
743M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
743M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
743M
            }
492
198M
          }
493
49.5M
          pixels[y * 4 * pixels_stride + x * 4] =
494
49.5M
              coefficients[(y + 2) * 8 + x + 2] +
495
49.5M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
49.5M
        }
497
24.7M
      }
498
12.3M
      break;
499
0
    }
500
12.3M
    case Type::DCT8X4: {
501
12.3M
      float dcs[2] = {};
502
12.3M
      float block0 = coefficients[0];
503
12.3M
      float block1 = coefficients[8];
504
12.3M
      dcs[0] = block0 + block1;
505
12.3M
      dcs[1] = block0 - block1;
506
37.1M
      for (size_t x = 0; x < 2; x++) {
507
24.7M
        HWY_ALIGN float block[4 * 8];
508
24.7M
        block[0] = dcs[x];
509
123M
        for (size_t iy = 0; iy < 4; iy++) {
510
891M
          for (size_t ix = 0; ix < 8; ix++) {
511
792M
            if (ix == 0 && iy == 0) continue;
512
768M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
768M
          }
514
99.1M
        }
515
24.7M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
24.7M
                                  scratch_space);
517
24.7M
      }
518
12.3M
      break;
519
0
    }
520
12.3M
    case Type::DCT4X8: {
521
12.3M
      float dcs[2] = {};
522
12.3M
      float block0 = coefficients[0];
523
12.3M
      float block1 = coefficients[8];
524
12.3M
      dcs[0] = block0 + block1;
525
12.3M
      dcs[1] = block0 - block1;
526
37.1M
      for (size_t y = 0; y < 2; y++) {
527
24.7M
        HWY_ALIGN float block[4 * 8];
528
24.7M
        block[0] = dcs[y];
529
123M
        for (size_t iy = 0; iy < 4; iy++) {
530
891M
          for (size_t ix = 0; ix < 8; ix++) {
531
792M
            if (ix == 0 && iy == 0) continue;
532
768M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
768M
          }
534
99.1M
        }
535
24.7M
        ComputeScaledIDCT<4, 8>()(
536
24.7M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
24.7M
            scratch_space);
538
24.7M
      }
539
12.3M
      break;
540
0
    }
541
12.3M
    case Type::DCT4X4: {
542
12.3M
      float dcs[4] = {};
543
12.3M
      float block00 = coefficients[0];
544
12.3M
      float block01 = coefficients[1];
545
12.3M
      float block10 = coefficients[8];
546
12.3M
      float block11 = coefficients[9];
547
12.3M
      dcs[0] = block00 + block01 + block10 + block11;
548
12.3M
      dcs[1] = block00 + block01 - block10 - block11;
549
12.3M
      dcs[2] = block00 - block01 + block10 - block11;
550
12.3M
      dcs[3] = block00 - block01 - block10 + block11;
551
37.1M
      for (size_t y = 0; y < 2; y++) {
552
74.3M
        for (size_t x = 0; x < 2; x++) {
553
49.5M
          HWY_ALIGN float block[4 * 4];
554
49.5M
          block[0] = dcs[y * 2 + x];
555
247M
          for (size_t iy = 0; iy < 4; iy++) {
556
991M
            for (size_t ix = 0; ix < 4; ix++) {
557
792M
              if (ix == 0 && iy == 0) continue;
558
743M
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
743M
            }
560
198M
          }
561
49.5M
          ComputeScaledIDCT<4, 4>()(
562
49.5M
              block,
563
49.5M
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
49.5M
              scratch_space);
565
49.5M
        }
566
24.7M
      }
567
12.3M
      break;
568
0
    }
569
12.3M
    case Type::DCT2X2: {
570
12.3M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
12.3M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
12.3M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
12.3M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
12.3M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
111M
      for (size_t y = 0; y < kBlockDim; y++) {
576
891M
        for (size_t x = 0; x < kBlockDim; x++) {
577
792M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
792M
        }
579
99.1M
      }
580
12.3M
      break;
581
0
    }
582
5.03M
    case Type::DCT16X16: {
583
5.03M
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
5.03M
                                  scratch_space);
585
5.03M
      break;
586
0
    }
587
9.94M
    case Type::DCT16X8: {
588
9.94M
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
9.94M
                                 scratch_space);
590
9.94M
      break;
591
0
    }
592
9.92M
    case Type::DCT8X16: {
593
9.92M
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
9.92M
                                 scratch_space);
595
9.92M
      break;
596
0
    }
597
0
    case Type::DCT32X8: {
598
0
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
0
                                 scratch_space);
600
0
      break;
601
0
    }
602
0
    case Type::DCT8X32: {
603
0
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
0
                                 scratch_space);
605
0
      break;
606
0
    }
607
1.97M
    case Type::DCT32X16: {
608
1.97M
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
1.97M
                                  scratch_space);
610
1.97M
      break;
611
0
    }
612
1.95M
    case Type::DCT16X32: {
613
1.95M
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
1.95M
                                  scratch_space);
615
1.95M
      break;
616
0
    }
617
999k
    case Type::DCT32X32: {
618
999k
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
999k
                                  scratch_space);
620
999k
      break;
621
0
    }
622
12.3M
    case Type::DCT: {
623
12.3M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
12.3M
                                scratch_space);
625
12.3M
      break;
626
0
    }
627
12.3M
    case Type::AFV0: {
628
12.3M
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
12.3M
      break;
630
0
    }
631
12.3M
    case Type::AFV1: {
632
12.3M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
12.3M
      break;
634
0
    }
635
12.3M
    case Type::AFV2: {
636
12.3M
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
12.3M
      break;
638
0
    }
639
12.3M
    case Type::AFV3: {
640
12.3M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
12.3M
      break;
642
0
    }
643
599k
    case Type::DCT64X32: {
644
599k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
599k
                                  scratch_space);
646
599k
      break;
647
0
    }
648
370k
    case Type::DCT32X64: {
649
370k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
370k
                                  scratch_space);
651
370k
      break;
652
0
    }
653
171k
    case Type::DCT64X64: {
654
171k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
171k
                                  scratch_space);
656
171k
      break;
657
0
    }
658
0
    case Type::DCT128X64: {
659
0
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT64X128: {
664
0
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
0
    case Type::DCT128X128: {
669
0
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
0
                                    scratch_space);
671
0
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
154M
  }
689
154M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
18.6M
                                        float* scratch_space) {
461
18.6M
  using Type = AcStrategyType;
462
18.6M
  switch (strategy) {
463
3.29M
    case Type::IDENTITY: {
464
3.29M
      float dcs[4] = {};
465
3.29M
      float block00 = coefficients[0];
466
3.29M
      float block01 = coefficients[1];
467
3.29M
      float block10 = coefficients[8];
468
3.29M
      float block11 = coefficients[9];
469
3.29M
      dcs[0] = block00 + block01 + block10 + block11;
470
3.29M
      dcs[1] = block00 + block01 - block10 - block11;
471
3.29M
      dcs[2] = block00 - block01 + block10 - block11;
472
3.29M
      dcs[3] = block00 - block01 - block10 + block11;
473
9.87M
      for (size_t y = 0; y < 2; y++) {
474
19.7M
        for (size_t x = 0; x < 2; x++) {
475
13.1M
          float block_dc = dcs[y * 2 + x];
476
13.1M
          float residual_sum = 0;
477
65.8M
          for (size_t iy = 0; iy < 4; iy++) {
478
263M
            for (size_t ix = 0; ix < 4; ix++) {
479
210M
              if (ix == 0 && iy == 0) continue;
480
197M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
197M
            }
482
52.6M
          }
483
13.1M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
13.1M
              block_dc - residual_sum * (1.0f / 16);
485
65.8M
          for (size_t iy = 0; iy < 4; iy++) {
486
263M
            for (size_t ix = 0; ix < 4; ix++) {
487
210M
              if (ix == 1 && iy == 1) continue;
488
197M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
197M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
197M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
197M
            }
492
52.6M
          }
493
13.1M
          pixels[y * 4 * pixels_stride + x * 4] =
494
13.1M
              coefficients[(y + 2) * 8 + x + 2] +
495
13.1M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
13.1M
        }
497
6.58M
      }
498
3.29M
      break;
499
0
    }
500
457k
    case Type::DCT8X4: {
501
457k
      float dcs[2] = {};
502
457k
      float block0 = coefficients[0];
503
457k
      float block1 = coefficients[8];
504
457k
      dcs[0] = block0 + block1;
505
457k
      dcs[1] = block0 - block1;
506
1.37M
      for (size_t x = 0; x < 2; x++) {
507
915k
        HWY_ALIGN float block[4 * 8];
508
915k
        block[0] = dcs[x];
509
4.57M
        for (size_t iy = 0; iy < 4; iy++) {
510
32.9M
          for (size_t ix = 0; ix < 8; ix++) {
511
29.3M
            if (ix == 0 && iy == 0) continue;
512
28.3M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
28.3M
          }
514
3.66M
        }
515
915k
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
915k
                                  scratch_space);
517
915k
      }
518
457k
      break;
519
0
    }
520
211k
    case Type::DCT4X8: {
521
211k
      float dcs[2] = {};
522
211k
      float block0 = coefficients[0];
523
211k
      float block1 = coefficients[8];
524
211k
      dcs[0] = block0 + block1;
525
211k
      dcs[1] = block0 - block1;
526
635k
      for (size_t y = 0; y < 2; y++) {
527
423k
        HWY_ALIGN float block[4 * 8];
528
423k
        block[0] = dcs[y];
529
2.11M
        for (size_t iy = 0; iy < 4; iy++) {
530
15.2M
          for (size_t ix = 0; ix < 8; ix++) {
531
13.5M
            if (ix == 0 && iy == 0) continue;
532
13.1M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
13.1M
          }
534
1.69M
        }
535
423k
        ComputeScaledIDCT<4, 8>()(
536
423k
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
423k
            scratch_space);
538
423k
      }
539
211k
      break;
540
0
    }
541
5.64k
    case Type::DCT4X4: {
542
5.64k
      float dcs[4] = {};
543
5.64k
      float block00 = coefficients[0];
544
5.64k
      float block01 = coefficients[1];
545
5.64k
      float block10 = coefficients[8];
546
5.64k
      float block11 = coefficients[9];
547
5.64k
      dcs[0] = block00 + block01 + block10 + block11;
548
5.64k
      dcs[1] = block00 + block01 - block10 - block11;
549
5.64k
      dcs[2] = block00 - block01 + block10 - block11;
550
5.64k
      dcs[3] = block00 - block01 - block10 + block11;
551
16.9k
      for (size_t y = 0; y < 2; y++) {
552
33.8k
        for (size_t x = 0; x < 2; x++) {
553
22.5k
          HWY_ALIGN float block[4 * 4];
554
22.5k
          block[0] = dcs[y * 2 + x];
555
112k
          for (size_t iy = 0; iy < 4; iy++) {
556
451k
            for (size_t ix = 0; ix < 4; ix++) {
557
361k
              if (ix == 0 && iy == 0) continue;
558
338k
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
338k
            }
560
90.3k
          }
561
22.5k
          ComputeScaledIDCT<4, 4>()(
562
22.5k
              block,
563
22.5k
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
22.5k
              scratch_space);
565
22.5k
        }
566
11.2k
      }
567
5.64k
      break;
568
0
    }
569
6.13M
    case Type::DCT2X2: {
570
6.13M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
6.13M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
6.13M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
6.13M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
6.13M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
55.2M
      for (size_t y = 0; y < kBlockDim; y++) {
576
441M
        for (size_t x = 0; x < kBlockDim; x++) {
577
392M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
392M
        }
579
49.0M
      }
580
6.13M
      break;
581
0
    }
582
499k
    case Type::DCT16X16: {
583
499k
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
499k
                                  scratch_space);
585
499k
      break;
586
0
    }
587
691k
    case Type::DCT16X8: {
588
691k
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
691k
                                 scratch_space);
590
691k
      break;
591
0
    }
592
742k
    case Type::DCT8X16: {
593
742k
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
742k
                                 scratch_space);
595
742k
      break;
596
0
    }
597
42
    case Type::DCT32X8: {
598
42
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
42
                                 scratch_space);
600
42
      break;
601
0
    }
602
132
    case Type::DCT8X32: {
603
132
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
132
                                 scratch_space);
605
132
      break;
606
0
    }
607
153k
    case Type::DCT32X16: {
608
153k
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
153k
                                  scratch_space);
610
153k
      break;
611
0
    }
612
158k
    case Type::DCT16X32: {
613
158k
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
158k
                                  scratch_space);
615
158k
      break;
616
0
    }
617
270k
    case Type::DCT32X32: {
618
270k
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
270k
                                  scratch_space);
620
270k
      break;
621
0
    }
622
4.91M
    case Type::DCT: {
623
4.91M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
4.91M
                                scratch_space);
625
4.91M
      break;
626
0
    }
627
313k
    case Type::AFV0: {
628
313k
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
313k
      break;
630
0
    }
631
172k
    case Type::AFV1: {
632
172k
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
172k
      break;
634
0
    }
635
219k
    case Type::AFV2: {
636
219k
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
219k
      break;
638
0
    }
639
230k
    case Type::AFV3: {
640
230k
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
230k
      break;
642
0
    }
643
35.4k
    case Type::DCT64X32: {
644
35.4k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
35.4k
                                  scratch_space);
646
35.4k
      break;
647
0
    }
648
18.6k
    case Type::DCT32X64: {
649
18.6k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
18.6k
                                  scratch_space);
651
18.6k
      break;
652
0
    }
653
145k
    case Type::DCT64X64: {
654
145k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
145k
                                  scratch_space);
656
145k
      break;
657
0
    }
658
3
    case Type::DCT128X64: {
659
3
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
3
                                   scratch_space);
661
3
      break;
662
0
    }
663
3
    case Type::DCT64X128: {
664
3
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
3
                                   scratch_space);
666
3
      break;
667
0
    }
668
12
    case Type::DCT128X128: {
669
12
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
12
                                    scratch_space);
671
12
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
18.6M
  }
689
18.6M
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
690
691
HWY_MAYBE_UNUSED void LowestFrequenciesFromDC(const AcStrategyType strategy,
692
                                              const float* dc, size_t dc_stride,
693
                                              float* llf,
694
18.9M
                                              float* JXL_RESTRICT scratch) {
695
18.9M
  using Type = AcStrategyType;
696
18.9M
  HWY_ALIGN float warm_block[4 * 4];
697
18.9M
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
18.9M
  switch (strategy) {
699
691k
    case Type::DCT16X8: {
700
691k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
691k
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
691k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
691k
      break;
704
0
    }
705
742k
    case Type::DCT8X16: {
706
742k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
742k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
742k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
742k
      break;
710
0
    }
711
499k
    case Type::DCT16X16: {
712
499k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
499k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
499k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
499k
      break;
716
0
    }
717
42
    case Type::DCT32X8: {
718
42
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
42
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
42
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
42
      break;
722
0
    }
723
132
    case Type::DCT8X32: {
724
132
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
132
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
132
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
132
      break;
728
0
    }
729
153k
    case Type::DCT32X16: {
730
153k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
153k
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
153k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
153k
      break;
734
0
    }
735
158k
    case Type::DCT16X32: {
736
158k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
158k
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
158k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
158k
      break;
740
0
    }
741
270k
    case Type::DCT32X32: {
742
270k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
270k
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
270k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
270k
      break;
746
0
    }
747
35.4k
    case Type::DCT64X32: {
748
35.4k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
35.4k
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
35.4k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
35.4k
      break;
752
0
    }
753
18.6k
    case Type::DCT32X64: {
754
18.6k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
18.6k
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
18.6k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
18.6k
      break;
758
0
    }
759
145k
    case Type::DCT64X64: {
760
145k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
145k
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
145k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
145k
      break;
764
0
    }
765
3
    case Type::DCT128X64: {
766
3
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
3
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
3
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
3
      break;
770
0
    }
771
3
    case Type::DCT64X128: {
772
3
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
3
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
3
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
3
      break;
776
0
    }
777
12
    case Type::DCT128X128: {
778
12
      ReinterpretingDCT<
779
12
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
12
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
12
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
12
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
0
    case Type::DCT256X256: {
799
0
      ReinterpretingDCT<
800
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
0
      break;
804
0
    }
805
4.93M
    case Type::DCT:
806
11.0M
    case Type::DCT2X2:
807
11.0M
    case Type::DCT4X4:
808
11.2M
    case Type::DCT4X8:
809
11.7M
    case Type::DCT8X4:
810
12.0M
    case Type::AFV0:
811
12.2M
    case Type::AFV1:
812
12.4M
    case Type::AFV2:
813
12.6M
    case Type::AFV3:
814
16.2M
    case Type::IDENTITY:
815
16.2M
      llf[0] = dc[0];
816
16.2M
      break;
817
18.9M
  };
818
18.9M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
694
18.9M
                                              float* JXL_RESTRICT scratch) {
695
18.9M
  using Type = AcStrategyType;
696
18.9M
  HWY_ALIGN float warm_block[4 * 4];
697
18.9M
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
18.9M
  switch (strategy) {
699
691k
    case Type::DCT16X8: {
700
691k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
691k
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
691k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
691k
      break;
704
0
    }
705
742k
    case Type::DCT8X16: {
706
742k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
742k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
742k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
742k
      break;
710
0
    }
711
499k
    case Type::DCT16X16: {
712
499k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
499k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
499k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
499k
      break;
716
0
    }
717
42
    case Type::DCT32X8: {
718
42
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
42
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
42
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
42
      break;
722
0
    }
723
132
    case Type::DCT8X32: {
724
132
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
132
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
132
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
132
      break;
728
0
    }
729
153k
    case Type::DCT32X16: {
730
153k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
153k
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
153k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
153k
      break;
734
0
    }
735
158k
    case Type::DCT16X32: {
736
158k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
158k
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
158k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
158k
      break;
740
0
    }
741
270k
    case Type::DCT32X32: {
742
270k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
270k
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
270k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
270k
      break;
746
0
    }
747
35.4k
    case Type::DCT64X32: {
748
35.4k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
35.4k
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
35.4k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
35.4k
      break;
752
0
    }
753
18.6k
    case Type::DCT32X64: {
754
18.6k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
18.6k
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
18.6k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
18.6k
      break;
758
0
    }
759
145k
    case Type::DCT64X64: {
760
145k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
145k
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
145k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
145k
      break;
764
0
    }
765
3
    case Type::DCT128X64: {
766
3
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
3
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
3
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
3
      break;
770
0
    }
771
3
    case Type::DCT64X128: {
772
3
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
3
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
3
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
3
      break;
776
0
    }
777
12
    case Type::DCT128X128: {
778
12
      ReinterpretingDCT<
779
12
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
12
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
12
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
12
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
0
    case Type::DCT256X256: {
799
0
      ReinterpretingDCT<
800
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
0
      break;
804
0
    }
805
4.93M
    case Type::DCT:
806
11.0M
    case Type::DCT2X2:
807
11.0M
    case Type::DCT4X4:
808
11.2M
    case Type::DCT4X8:
809
11.7M
    case Type::DCT8X4:
810
12.0M
    case Type::AFV0:
811
12.2M
    case Type::AFV1:
812
12.4M
    case Type::AFV2:
813
12.6M
    case Type::AFV3:
814
16.2M
    case Type::IDENTITY:
815
16.2M
      llf[0] = dc[0];
816
16.2M
      break;
817
18.9M
  };
818
18.9M
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
819
820
}  // namespace
821
// NOLINTNEXTLINE(google-readability-namespace-comments)
822
}  // namespace HWY_NAMESPACE
823
}  // namespace jxl
824
HWY_AFTER_NAMESPACE();
825
826
#endif  // LIB_JXL_DEC_TRANSFORMS_INL_H_