Coverage Report

Created: 2025-08-12 07:37

/src/libjxl/lib/jxl/dec_transforms-inl.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include <cstring>
7
8
#include "lib/jxl/base/compiler_specific.h"
9
#include "lib/jxl/frame_dimensions.h"
10
11
#if defined(LIB_JXL_DEC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
12
#ifdef LIB_JXL_DEC_TRANSFORMS_INL_H_
13
#undef LIB_JXL_DEC_TRANSFORMS_INL_H_
14
#else
15
#define LIB_JXL_DEC_TRANSFORMS_INL_H_
16
#endif
17
18
#include <cstddef>
19
#include <hwy/highway.h>
20
21
#include "lib/jxl/ac_strategy.h"
22
#include "lib/jxl/dct-inl.h"
23
#include "lib/jxl/dct_scales.h"
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
namespace HWY_NAMESPACE {
27
namespace {
28
29
// These templates are not found via ADL.
30
using hwy::HWY_NAMESPACE::MulAdd;
31
32
// Computes the lowest-frequency LF_ROWSxLF_COLS-sized square in output, which
33
// is a DCT_ROWS*DCT_COLS-sized DCT block, by doing a ROWS*COLS DCT on the
34
// input block.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
JXL_INLINE void ReinterpretingDCT(const float* input, const size_t input_stride,
38
                                  float* output, const size_t output_stride,
39
                                  float* JXL_RESTRICT block,
40
278k
                                  float* JXL_RESTRICT scratch_space) {
41
278k
  static_assert(LF_ROWS == ROWS,
42
278k
                "ReinterpretingDCT should only be called with LF == N");
43
278k
  static_assert(LF_COLS == COLS,
44
278k
                "ReinterpretingDCT should only be called with LF == N");
45
278k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
278k
                                 scratch_space);
47
278k
  if (ROWS < COLS) {
48
220k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
465k
      for (size_t x = 0; x < LF_COLS; x++) {
50
343k
        output[y * output_stride + x] =
51
343k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
343k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
343k
      }
54
122k
    }
55
179k
  } else {
56
598k
    for (size_t y = 0; y < LF_COLS; y++) {
57
1.90M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.48M
        output[y * output_stride + x] =
59
1.48M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.48M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.48M
      }
62
418k
    }
63
179k
  }
64
278k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
58.8k
                                  float* JXL_RESTRICT scratch_space) {
41
58.8k
  static_assert(LF_ROWS == ROWS,
42
58.8k
                "ReinterpretingDCT should only be called with LF == N");
43
58.8k
  static_assert(LF_COLS == COLS,
44
58.8k
                "ReinterpretingDCT should only be called with LF == N");
45
58.8k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
58.8k
                                 scratch_space);
47
58.8k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
58.8k
  } else {
56
117k
    for (size_t y = 0; y < LF_COLS; y++) {
57
176k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
117k
        output[y * output_stride + x] =
59
117k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
117k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
117k
      }
62
58.8k
    }
63
58.8k
  }
64
58.8k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
74.7k
                                  float* JXL_RESTRICT scratch_space) {
41
74.7k
  static_assert(LF_ROWS == ROWS,
42
74.7k
                "ReinterpretingDCT should only be called with LF == N");
43
74.7k
  static_assert(LF_COLS == COLS,
44
74.7k
                "ReinterpretingDCT should only be called with LF == N");
45
74.7k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
74.7k
                                 scratch_space);
47
74.7k
  if (ROWS < COLS) {
48
149k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
224k
      for (size_t x = 0; x < LF_COLS; x++) {
50
149k
        output[y * output_stride + x] =
51
149k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
149k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
149k
      }
54
74.7k
    }
55
74.7k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
74.7k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
50.5k
                                  float* JXL_RESTRICT scratch_space) {
41
50.5k
  static_assert(LF_ROWS == ROWS,
42
50.5k
                "ReinterpretingDCT should only be called with LF == N");
43
50.5k
  static_assert(LF_COLS == COLS,
44
50.5k
                "ReinterpretingDCT should only be called with LF == N");
45
50.5k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
50.5k
                                 scratch_space);
47
50.5k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
50.5k
  } else {
56
151k
    for (size_t y = 0; y < LF_COLS; y++) {
57
303k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
202k
        output[y * output_stride + x] =
59
202k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
202k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
202k
      }
62
101k
    }
63
50.5k
  }
64
50.5k
}
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
19.3k
                                  float* JXL_RESTRICT scratch_space) {
41
19.3k
  static_assert(LF_ROWS == ROWS,
42
19.3k
                "ReinterpretingDCT should only be called with LF == N");
43
19.3k
  static_assert(LF_COLS == COLS,
44
19.3k
                "ReinterpretingDCT should only be called with LF == N");
45
19.3k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
19.3k
                                 scratch_space);
47
19.3k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
19.3k
  } else {
56
57.9k
    for (size_t y = 0; y < LF_COLS; y++) {
57
193k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
154k
        output[y * output_stride + x] =
59
154k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
154k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
154k
      }
62
38.6k
    }
63
19.3k
  }
64
19.3k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
23.0k
                                  float* JXL_RESTRICT scratch_space) {
41
23.0k
  static_assert(LF_ROWS == ROWS,
42
23.0k
                "ReinterpretingDCT should only be called with LF == N");
43
23.0k
  static_assert(LF_COLS == COLS,
44
23.0k
                "ReinterpretingDCT should only be called with LF == N");
45
23.0k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
23.0k
                                 scratch_space);
47
23.0k
  if (ROWS < COLS) {
48
69.2k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
230k
      for (size_t x = 0; x < LF_COLS; x++) {
50
184k
        output[y * output_stride + x] =
51
184k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
184k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
184k
      }
54
46.1k
    }
55
23.0k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
23.0k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
46.8k
                                  float* JXL_RESTRICT scratch_space) {
41
46.8k
  static_assert(LF_ROWS == ROWS,
42
46.8k
                "ReinterpretingDCT should only be called with LF == N");
43
46.8k
  static_assert(LF_COLS == COLS,
44
46.8k
                "ReinterpretingDCT should only be called with LF == N");
45
46.8k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
46.8k
                                 scratch_space);
47
46.8k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
46.8k
  } else {
56
234k
    for (size_t y = 0; y < LF_COLS; y++) {
57
936k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
749k
        output[y * output_stride + x] =
59
749k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
749k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
749k
      }
62
187k
    }
63
46.8k
  }
64
46.8k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
594
                                  float* JXL_RESTRICT scratch_space) {
41
594
  static_assert(LF_ROWS == ROWS,
42
594
                "ReinterpretingDCT should only be called with LF == N");
43
594
  static_assert(LF_COLS == COLS,
44
594
                "ReinterpretingDCT should only be called with LF == N");
45
594
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
594
                                 scratch_space);
47
594
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
594
  } else {
56
2.97k
    for (size_t y = 0; y < LF_COLS; y++) {
57
21.3k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
19.0k
        output[y * output_stride + x] =
59
19.0k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
19.0k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
19.0k
      }
62
2.37k
    }
63
594
  }
64
594
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
288
                                  float* JXL_RESTRICT scratch_space) {
41
288
  static_assert(LF_ROWS == ROWS,
42
288
                "ReinterpretingDCT should only be called with LF == N");
43
288
  static_assert(LF_COLS == COLS,
44
288
                "ReinterpretingDCT should only be called with LF == N");
45
288
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
288
                                 scratch_space);
47
288
  if (ROWS < COLS) {
48
1.44k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
10.3k
      for (size_t x = 0; x < LF_COLS; x++) {
50
9.21k
        output[y * output_stride + x] =
51
9.21k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
9.21k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
9.21k
      }
54
1.15k
    }
55
288
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
288
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
3.80k
                                  float* JXL_RESTRICT scratch_space) {
41
3.80k
  static_assert(LF_ROWS == ROWS,
42
3.80k
                "ReinterpretingDCT should only be called with LF == N");
43
3.80k
  static_assert(LF_COLS == COLS,
44
3.80k
                "ReinterpretingDCT should only be called with LF == N");
45
3.80k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
3.80k
                                 scratch_space);
47
3.80k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
3.80k
  } else {
56
34.2k
    for (size_t y = 0; y < LF_COLS; y++) {
57
274k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
243k
        output[y * output_stride + x] =
59
243k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
243k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
243k
      }
62
30.4k
    }
63
3.80k
  }
64
3.80k
}
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
65
66
template <size_t S>
67
4.41M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
4.41M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
4.41M
  static_assert(S % 2 == 0, "S should be even");
70
4.41M
  float temp[kDCTBlockSize];
71
4.41M
  constexpr size_t num_2x2 = S / 2;
72
14.7M
  for (size_t y = 0; y < num_2x2; y++) {
73
41.1M
    for (size_t x = 0; x < num_2x2; x++) {
74
30.8M
      float c00 = block[y * kBlockDim + x];
75
30.8M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
30.8M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
30.8M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
30.8M
      float r00 = c00 + c01 + c10 + c11;
79
30.8M
      float r01 = c00 + c01 - c10 - c11;
80
30.8M
      float r10 = c00 - c01 + c10 - c11;
81
30.8M
      float r11 = c00 - c01 - c10 + c11;
82
30.8M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
30.8M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
30.8M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
30.8M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
30.8M
    }
87
10.2M
  }
88
24.9M
  for (size_t y = 0; y < S; y++) {
89
144M
    for (size_t x = 0; x < S; x++) {
90
123M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
123M
    }
92
20.5M
  }
93
4.41M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.05M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
1.05M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.05M
  static_assert(S % 2 == 0, "S should be even");
70
1.05M
  float temp[kDCTBlockSize];
71
1.05M
  constexpr size_t num_2x2 = S / 2;
72
2.11M
  for (size_t y = 0; y < num_2x2; y++) {
73
2.11M
    for (size_t x = 0; x < num_2x2; x++) {
74
1.05M
      float c00 = block[y * kBlockDim + x];
75
1.05M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
1.05M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
1.05M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
1.05M
      float r00 = c00 + c01 + c10 + c11;
79
1.05M
      float r01 = c00 + c01 - c10 - c11;
80
1.05M
      float r10 = c00 - c01 + c10 - c11;
81
1.05M
      float r11 = c00 - c01 - c10 + c11;
82
1.05M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
1.05M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
1.05M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
1.05M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
1.05M
    }
87
1.05M
  }
88
3.17M
  for (size_t y = 0; y < S; y++) {
89
6.35M
    for (size_t x = 0; x < S; x++) {
90
4.23M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
4.23M
    }
92
2.11M
  }
93
1.05M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.05M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
1.05M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.05M
  static_assert(S % 2 == 0, "S should be even");
70
1.05M
  float temp[kDCTBlockSize];
71
1.05M
  constexpr size_t num_2x2 = S / 2;
72
3.17M
  for (size_t y = 0; y < num_2x2; y++) {
73
6.35M
    for (size_t x = 0; x < num_2x2; x++) {
74
4.23M
      float c00 = block[y * kBlockDim + x];
75
4.23M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
4.23M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
4.23M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
4.23M
      float r00 = c00 + c01 + c10 + c11;
79
4.23M
      float r01 = c00 + c01 - c10 - c11;
80
4.23M
      float r10 = c00 - c01 + c10 - c11;
81
4.23M
      float r11 = c00 - c01 - c10 + c11;
82
4.23M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
4.23M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
4.23M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
4.23M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
4.23M
    }
87
2.11M
  }
88
5.29M
  for (size_t y = 0; y < S; y++) {
89
21.1M
    for (size_t x = 0; x < S; x++) {
90
16.9M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
16.9M
    }
92
4.23M
  }
93
1.05M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.05M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
1.05M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.05M
  static_assert(S % 2 == 0, "S should be even");
70
1.05M
  float temp[kDCTBlockSize];
71
1.05M
  constexpr size_t num_2x2 = S / 2;
72
5.29M
  for (size_t y = 0; y < num_2x2; y++) {
73
21.1M
    for (size_t x = 0; x < num_2x2; x++) {
74
16.9M
      float c00 = block[y * kBlockDim + x];
75
16.9M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
16.9M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
16.9M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
16.9M
      float r00 = c00 + c01 + c10 + c11;
79
16.9M
      float r01 = c00 + c01 - c10 - c11;
80
16.9M
      float r10 = c00 - c01 + c10 - c11;
81
16.9M
      float r11 = c00 - c01 - c10 + c11;
82
16.9M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
16.9M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
16.9M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
16.9M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
16.9M
    }
87
4.23M
  }
88
9.53M
  for (size_t y = 0; y < S; y++) {
89
76.2M
    for (size_t x = 0; x < S; x++) {
90
67.8M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
67.8M
    }
92
8.47M
  }
93
1.05M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
410k
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
410k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
410k
  static_assert(S % 2 == 0, "S should be even");
70
410k
  float temp[kDCTBlockSize];
71
410k
  constexpr size_t num_2x2 = S / 2;
72
821k
  for (size_t y = 0; y < num_2x2; y++) {
73
821k
    for (size_t x = 0; x < num_2x2; x++) {
74
410k
      float c00 = block[y * kBlockDim + x];
75
410k
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
410k
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
410k
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
410k
      float r00 = c00 + c01 + c10 + c11;
79
410k
      float r01 = c00 + c01 - c10 - c11;
80
410k
      float r10 = c00 - c01 + c10 - c11;
81
410k
      float r11 = c00 - c01 - c10 + c11;
82
410k
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
410k
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
410k
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
410k
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
410k
    }
87
410k
  }
88
1.23M
  for (size_t y = 0; y < S; y++) {
89
2.46M
    for (size_t x = 0; x < S; x++) {
90
1.64M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
1.64M
    }
92
821k
  }
93
410k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
410k
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
410k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
410k
  static_assert(S % 2 == 0, "S should be even");
70
410k
  float temp[kDCTBlockSize];
71
410k
  constexpr size_t num_2x2 = S / 2;
72
1.23M
  for (size_t y = 0; y < num_2x2; y++) {
73
2.46M
    for (size_t x = 0; x < num_2x2; x++) {
74
1.64M
      float c00 = block[y * kBlockDim + x];
75
1.64M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
1.64M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
1.64M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
1.64M
      float r00 = c00 + c01 + c10 + c11;
79
1.64M
      float r01 = c00 + c01 - c10 - c11;
80
1.64M
      float r10 = c00 - c01 + c10 - c11;
81
1.64M
      float r11 = c00 - c01 - c10 + c11;
82
1.64M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
1.64M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
1.64M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
1.64M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
1.64M
    }
87
821k
  }
88
2.05M
  for (size_t y = 0; y < S; y++) {
89
8.21M
    for (size_t x = 0; x < S; x++) {
90
6.56M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
6.56M
    }
92
1.64M
  }
93
410k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
410k
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
410k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
410k
  static_assert(S % 2 == 0, "S should be even");
70
410k
  float temp[kDCTBlockSize];
71
410k
  constexpr size_t num_2x2 = S / 2;
72
2.05M
  for (size_t y = 0; y < num_2x2; y++) {
73
8.21M
    for (size_t x = 0; x < num_2x2; x++) {
74
6.56M
      float c00 = block[y * kBlockDim + x];
75
6.56M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
6.56M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
6.56M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
6.56M
      float r00 = c00 + c01 + c10 + c11;
79
6.56M
      float r01 = c00 + c01 - c10 - c11;
80
6.56M
      float r10 = c00 - c01 + c10 - c11;
81
6.56M
      float r11 = c00 - c01 - c10 + c11;
82
6.56M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
6.56M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
6.56M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
6.56M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
6.56M
    }
87
1.64M
  }
88
3.69M
  for (size_t y = 0; y < S; y++) {
89
29.5M
    for (size_t x = 0; x < S; x++) {
90
26.2M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
26.2M
    }
92
3.28M
  }
93
410k
}
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
94
95
4.37M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
4.37M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
4.37M
      {
98
4.37M
          0.25,
99
4.37M
          0.25,
100
4.37M
          0.25,
101
4.37M
          0.25,
102
4.37M
          0.25,
103
4.37M
          0.25,
104
4.37M
          0.25,
105
4.37M
          0.25,
106
4.37M
          0.25,
107
4.37M
          0.25,
108
4.37M
          0.25,
109
4.37M
          0.25,
110
4.37M
          0.25,
111
4.37M
          0.25,
112
4.37M
          0.25,
113
4.37M
          0.25,
114
4.37M
      },
115
4.37M
      {
116
4.37M
          0.876902929799142f,
117
4.37M
          0.2206518106944235f,
118
4.37M
          -0.10140050393753763f,
119
4.37M
          -0.1014005039375375f,
120
4.37M
          0.2206518106944236f,
121
4.37M
          -0.10140050393753777f,
122
4.37M
          -0.10140050393753772f,
123
4.37M
          -0.10140050393753763f,
124
4.37M
          -0.10140050393753758f,
125
4.37M
          -0.10140050393753769f,
126
4.37M
          -0.1014005039375375f,
127
4.37M
          -0.10140050393753768f,
128
4.37M
          -0.10140050393753768f,
129
4.37M
          -0.10140050393753759f,
130
4.37M
          -0.10140050393753763f,
131
4.37M
          -0.10140050393753741f,
132
4.37M
      },
133
4.37M
      {
134
4.37M
          0.0,
135
4.37M
          0.0,
136
4.37M
          0.40670075830260755f,
137
4.37M
          0.44444816619734445f,
138
4.37M
          0.0,
139
4.37M
          0.0,
140
4.37M
          0.19574399372042936f,
141
4.37M
          0.2929100136981264f,
142
4.37M
          -0.40670075830260716f,
143
4.37M
          -0.19574399372042872f,
144
4.37M
          0.0,
145
4.37M
          0.11379074460448091f,
146
4.37M
          -0.44444816619734384f,
147
4.37M
          -0.29291001369812636f,
148
4.37M
          -0.1137907446044814f,
149
4.37M
          0.0,
150
4.37M
      },
151
4.37M
      {
152
4.37M
          0.0,
153
4.37M
          0.0,
154
4.37M
          -0.21255748058288748f,
155
4.37M
          0.3085497062849767f,
156
4.37M
          0.0,
157
4.37M
          0.4706702258572536f,
158
4.37M
          -0.1621205195722993f,
159
4.37M
          0.0,
160
4.37M
          -0.21255748058287047f,
161
4.37M
          -0.16212051957228327f,
162
4.37M
          -0.47067022585725277f,
163
4.37M
          -0.1464291867126764f,
164
4.37M
          0.3085497062849487f,
165
4.37M
          0.0,
166
4.37M
          -0.14642918671266536f,
167
4.37M
          0.4251149611657548f,
168
4.37M
      },
169
4.37M
      {
170
4.37M
          0.0,
171
4.37M
          -0.7071067811865474f,
172
4.37M
          0.0,
173
4.37M
          0.0,
174
4.37M
          0.7071067811865476f,
175
4.37M
          0.0,
176
4.37M
          0.0,
177
4.37M
          0.0,
178
4.37M
          0.0,
179
4.37M
          0.0,
180
4.37M
          0.0,
181
4.37M
          0.0,
182
4.37M
          0.0,
183
4.37M
          0.0,
184
4.37M
          0.0,
185
4.37M
          0.0,
186
4.37M
      },
187
4.37M
      {
188
4.37M
          -0.4105377591765233f,
189
4.37M
          0.6235485373547691f,
190
4.37M
          -0.06435071657946274f,
191
4.37M
          -0.06435071657946266f,
192
4.37M
          0.6235485373547694f,
193
4.37M
          -0.06435071657946284f,
194
4.37M
          -0.0643507165794628f,
195
4.37M
          -0.06435071657946274f,
196
4.37M
          -0.06435071657946272f,
197
4.37M
          -0.06435071657946279f,
198
4.37M
          -0.06435071657946266f,
199
4.37M
          -0.06435071657946277f,
200
4.37M
          -0.06435071657946277f,
201
4.37M
          -0.06435071657946273f,
202
4.37M
          -0.06435071657946274f,
203
4.37M
          -0.0643507165794626f,
204
4.37M
      },
205
4.37M
      {
206
4.37M
          0.0,
207
4.37M
          0.0,
208
4.37M
          -0.4517556589999482f,
209
4.37M
          0.15854503551840063f,
210
4.37M
          0.0,
211
4.37M
          -0.04038515160822202f,
212
4.37M
          0.0074182263792423875f,
213
4.37M
          0.39351034269210167f,
214
4.37M
          -0.45175565899994635f,
215
4.37M
          0.007418226379244351f,
216
4.37M
          0.1107416575309343f,
217
4.37M
          0.08298163094882051f,
218
4.37M
          0.15854503551839705f,
219
4.37M
          0.3935103426921022f,
220
4.37M
          0.0829816309488214f,
221
4.37M
          -0.45175565899994796f,
222
4.37M
      },
223
4.37M
      {
224
4.37M
          0.0,
225
4.37M
          0.0,
226
4.37M
          -0.304684750724869f,
227
4.37M
          0.5112616136591823f,
228
4.37M
          0.0,
229
4.37M
          0.0,
230
4.37M
          -0.290480129728998f,
231
4.37M
          -0.06578701549142804f,
232
4.37M
          0.304684750724884f,
233
4.37M
          0.2904801297290076f,
234
4.37M
          0.0,
235
4.37M
          -0.23889773523344604f,
236
4.37M
          -0.5112616136592012f,
237
4.37M
          0.06578701549142545f,
238
4.37M
          0.23889773523345467f,
239
4.37M
          0.0,
240
4.37M
      },
241
4.37M
      {
242
4.37M
          0.0,
243
4.37M
          0.0,
244
4.37M
          0.3017929516615495f,
245
4.37M
          0.25792362796341184f,
246
4.37M
          0.0,
247
4.37M
          0.16272340142866204f,
248
4.37M
          0.09520022653475037f,
249
4.37M
          0.0,
250
4.37M
          0.3017929516615503f,
251
4.37M
          0.09520022653475055f,
252
4.37M
          -0.16272340142866173f,
253
4.37M
          -0.35312385449816297f,
254
4.37M
          0.25792362796341295f,
255
4.37M
          0.0,
256
4.37M
          -0.3531238544981624f,
257
4.37M
          -0.6035859033230976f,
258
4.37M
      },
259
4.37M
      {
260
4.37M
          0.0,
261
4.37M
          0.0,
262
4.37M
          0.40824829046386274f,
263
4.37M
          0.0,
264
4.37M
          0.0,
265
4.37M
          0.0,
266
4.37M
          0.0,
267
4.37M
          -0.4082482904638628f,
268
4.37M
          -0.4082482904638635f,
269
4.37M
          0.0,
270
4.37M
          0.0,
271
4.37M
          -0.40824829046386296f,
272
4.37M
          0.0,
273
4.37M
          0.4082482904638634f,
274
4.37M
          0.408248290463863f,
275
4.37M
          0.0,
276
4.37M
      },
277
4.37M
      {
278
4.37M
          0.0,
279
4.37M
          0.0,
280
4.37M
          0.1747866975480809f,
281
4.37M
          0.0812611176717539f,
282
4.37M
          0.0,
283
4.37M
          0.0,
284
4.37M
          -0.3675398009862027f,
285
4.37M
          -0.307882213957909f,
286
4.37M
          -0.17478669754808135f,
287
4.37M
          0.3675398009862011f,
288
4.37M
          0.0,
289
4.37M
          0.4826689115059883f,
290
4.37M
          -0.08126111767175039f,
291
4.37M
          0.30788221395790305f,
292
4.37M
          -0.48266891150598584f,
293
4.37M
          0.0,
294
4.37M
      },
295
4.37M
      {
296
4.37M
          0.0,
297
4.37M
          0.0,
298
4.37M
          -0.21105601049335784f,
299
4.37M
          0.18567180916109802f,
300
4.37M
          0.0,
301
4.37M
          0.0,
302
4.37M
          0.49215859013738733f,
303
4.37M
          -0.38525013709251915f,
304
4.37M
          0.21105601049335806f,
305
4.37M
          -0.49215859013738905f,
306
4.37M
          0.0,
307
4.37M
          0.17419412659916217f,
308
4.37M
          -0.18567180916109904f,
309
4.37M
          0.3852501370925211f,
310
4.37M
          -0.1741941265991621f,
311
4.37M
          0.0,
312
4.37M
      },
313
4.37M
      {
314
4.37M
          0.0,
315
4.37M
          0.0,
316
4.37M
          -0.14266084808807264f,
317
4.37M
          -0.3416446842253372f,
318
4.37M
          0.0,
319
4.37M
          0.7367497537172237f,
320
4.37M
          0.24627107722075148f,
321
4.37M
          -0.08574019035519306f,
322
4.37M
          -0.14266084808807344f,
323
4.37M
          0.24627107722075137f,
324
4.37M
          0.14883399227113567f,
325
4.37M
          -0.04768680350229251f,
326
4.37M
          -0.3416446842253373f,
327
4.37M
          -0.08574019035519267f,
328
4.37M
          -0.047686803502292804f,
329
4.37M
          -0.14266084808807242f,
330
4.37M
      },
331
4.37M
      {
332
4.37M
          0.0,
333
4.37M
          0.0,
334
4.37M
          -0.13813540350758585f,
335
4.37M
          0.3302282550303788f,
336
4.37M
          0.0,
337
4.37M
          0.08755115000587084f,
338
4.37M
          -0.07946706605909573f,
339
4.37M
          -0.4613374887461511f,
340
4.37M
          -0.13813540350758294f,
341
4.37M
          -0.07946706605910261f,
342
4.37M
          0.49724647109535086f,
343
4.37M
          0.12538059448563663f,
344
4.37M
          0.3302282550303805f,
345
4.37M
          -0.4613374887461554f,
346
4.37M
          0.12538059448564315f,
347
4.37M
          -0.13813540350758452f,
348
4.37M
      },
349
4.37M
      {
350
4.37M
          0.0,
351
4.37M
          0.0,
352
4.37M
          -0.17437602599651067f,
353
4.37M
          0.0702790691196284f,
354
4.37M
          0.0,
355
4.37M
          -0.2921026642334881f,
356
4.37M
          0.3623817333531167f,
357
4.37M
          0.0,
358
4.37M
          -0.1743760259965108f,
359
4.37M
          0.36238173335311646f,
360
4.37M
          0.29210266423348785f,
361
4.37M
          -0.4326608024727445f,
362
4.37M
          0.07027906911962818f,
363
4.37M
          0.0,
364
4.37M
          -0.4326608024727457f,
365
4.37M
          0.34875205199302267f,
366
4.37M
      },
367
4.37M
      {
368
4.37M
          0.0,
369
4.37M
          0.0,
370
4.37M
          0.11354987314994337f,
371
4.37M
          -0.07417504595810355f,
372
4.37M
          0.0,
373
4.37M
          0.19402893032594343f,
374
4.37M
          -0.435190496523228f,
375
4.37M
          0.21918684838857466f,
376
4.37M
          0.11354987314994257f,
377
4.37M
          -0.4351904965232251f,
378
4.37M
          0.5550443808910661f,
379
4.37M
          -0.25468277124066463f,
380
4.37M
          -0.07417504595810233f,
381
4.37M
          0.2191868483885728f,
382
4.37M
          -0.25468277124066413f,
383
4.37M
          0.1135498731499429f,
384
4.37M
      },
385
4.37M
  };
386
387
4.37M
  const HWY_CAPPED(float, 16) d;
388
13.1M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
8.75M
    auto pixel = Zero(d);
390
148M
    for (size_t j = 0; j < 16; j++) {
391
140M
      auto cf = Set(d, coeffs[j]);
392
140M
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
140M
      pixel = MulAdd(cf, basis, pixel);
394
140M
    }
395
8.75M
    Store(pixel, d, pixels + i);
396
8.75M
  }
397
4.37M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
4.23M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
4.23M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
4.23M
      {
98
4.23M
          0.25,
99
4.23M
          0.25,
100
4.23M
          0.25,
101
4.23M
          0.25,
102
4.23M
          0.25,
103
4.23M
          0.25,
104
4.23M
          0.25,
105
4.23M
          0.25,
106
4.23M
          0.25,
107
4.23M
          0.25,
108
4.23M
          0.25,
109
4.23M
          0.25,
110
4.23M
          0.25,
111
4.23M
          0.25,
112
4.23M
          0.25,
113
4.23M
          0.25,
114
4.23M
      },
115
4.23M
      {
116
4.23M
          0.876902929799142f,
117
4.23M
          0.2206518106944235f,
118
4.23M
          -0.10140050393753763f,
119
4.23M
          -0.1014005039375375f,
120
4.23M
          0.2206518106944236f,
121
4.23M
          -0.10140050393753777f,
122
4.23M
          -0.10140050393753772f,
123
4.23M
          -0.10140050393753763f,
124
4.23M
          -0.10140050393753758f,
125
4.23M
          -0.10140050393753769f,
126
4.23M
          -0.1014005039375375f,
127
4.23M
          -0.10140050393753768f,
128
4.23M
          -0.10140050393753768f,
129
4.23M
          -0.10140050393753759f,
130
4.23M
          -0.10140050393753763f,
131
4.23M
          -0.10140050393753741f,
132
4.23M
      },
133
4.23M
      {
134
4.23M
          0.0,
135
4.23M
          0.0,
136
4.23M
          0.40670075830260755f,
137
4.23M
          0.44444816619734445f,
138
4.23M
          0.0,
139
4.23M
          0.0,
140
4.23M
          0.19574399372042936f,
141
4.23M
          0.2929100136981264f,
142
4.23M
          -0.40670075830260716f,
143
4.23M
          -0.19574399372042872f,
144
4.23M
          0.0,
145
4.23M
          0.11379074460448091f,
146
4.23M
          -0.44444816619734384f,
147
4.23M
          -0.29291001369812636f,
148
4.23M
          -0.1137907446044814f,
149
4.23M
          0.0,
150
4.23M
      },
151
4.23M
      {
152
4.23M
          0.0,
153
4.23M
          0.0,
154
4.23M
          -0.21255748058288748f,
155
4.23M
          0.3085497062849767f,
156
4.23M
          0.0,
157
4.23M
          0.4706702258572536f,
158
4.23M
          -0.1621205195722993f,
159
4.23M
          0.0,
160
4.23M
          -0.21255748058287047f,
161
4.23M
          -0.16212051957228327f,
162
4.23M
          -0.47067022585725277f,
163
4.23M
          -0.1464291867126764f,
164
4.23M
          0.3085497062849487f,
165
4.23M
          0.0,
166
4.23M
          -0.14642918671266536f,
167
4.23M
          0.4251149611657548f,
168
4.23M
      },
169
4.23M
      {
170
4.23M
          0.0,
171
4.23M
          -0.7071067811865474f,
172
4.23M
          0.0,
173
4.23M
          0.0,
174
4.23M
          0.7071067811865476f,
175
4.23M
          0.0,
176
4.23M
          0.0,
177
4.23M
          0.0,
178
4.23M
          0.0,
179
4.23M
          0.0,
180
4.23M
          0.0,
181
4.23M
          0.0,
182
4.23M
          0.0,
183
4.23M
          0.0,
184
4.23M
          0.0,
185
4.23M
          0.0,
186
4.23M
      },
187
4.23M
      {
188
4.23M
          -0.4105377591765233f,
189
4.23M
          0.6235485373547691f,
190
4.23M
          -0.06435071657946274f,
191
4.23M
          -0.06435071657946266f,
192
4.23M
          0.6235485373547694f,
193
4.23M
          -0.06435071657946284f,
194
4.23M
          -0.0643507165794628f,
195
4.23M
          -0.06435071657946274f,
196
4.23M
          -0.06435071657946272f,
197
4.23M
          -0.06435071657946279f,
198
4.23M
          -0.06435071657946266f,
199
4.23M
          -0.06435071657946277f,
200
4.23M
          -0.06435071657946277f,
201
4.23M
          -0.06435071657946273f,
202
4.23M
          -0.06435071657946274f,
203
4.23M
          -0.0643507165794626f,
204
4.23M
      },
205
4.23M
      {
206
4.23M
          0.0,
207
4.23M
          0.0,
208
4.23M
          -0.4517556589999482f,
209
4.23M
          0.15854503551840063f,
210
4.23M
          0.0,
211
4.23M
          -0.04038515160822202f,
212
4.23M
          0.0074182263792423875f,
213
4.23M
          0.39351034269210167f,
214
4.23M
          -0.45175565899994635f,
215
4.23M
          0.007418226379244351f,
216
4.23M
          0.1107416575309343f,
217
4.23M
          0.08298163094882051f,
218
4.23M
          0.15854503551839705f,
219
4.23M
          0.3935103426921022f,
220
4.23M
          0.0829816309488214f,
221
4.23M
          -0.45175565899994796f,
222
4.23M
      },
223
4.23M
      {
224
4.23M
          0.0,
225
4.23M
          0.0,
226
4.23M
          -0.304684750724869f,
227
4.23M
          0.5112616136591823f,
228
4.23M
          0.0,
229
4.23M
          0.0,
230
4.23M
          -0.290480129728998f,
231
4.23M
          -0.06578701549142804f,
232
4.23M
          0.304684750724884f,
233
4.23M
          0.2904801297290076f,
234
4.23M
          0.0,
235
4.23M
          -0.23889773523344604f,
236
4.23M
          -0.5112616136592012f,
237
4.23M
          0.06578701549142545f,
238
4.23M
          0.23889773523345467f,
239
4.23M
          0.0,
240
4.23M
      },
241
4.23M
      {
242
4.23M
          0.0,
243
4.23M
          0.0,
244
4.23M
          0.3017929516615495f,
245
4.23M
          0.25792362796341184f,
246
4.23M
          0.0,
247
4.23M
          0.16272340142866204f,
248
4.23M
          0.09520022653475037f,
249
4.23M
          0.0,
250
4.23M
          0.3017929516615503f,
251
4.23M
          0.09520022653475055f,
252
4.23M
          -0.16272340142866173f,
253
4.23M
          -0.35312385449816297f,
254
4.23M
          0.25792362796341295f,
255
4.23M
          0.0,
256
4.23M
          -0.3531238544981624f,
257
4.23M
          -0.6035859033230976f,
258
4.23M
      },
259
4.23M
      {
260
4.23M
          0.0,
261
4.23M
          0.0,
262
4.23M
          0.40824829046386274f,
263
4.23M
          0.0,
264
4.23M
          0.0,
265
4.23M
          0.0,
266
4.23M
          0.0,
267
4.23M
          -0.4082482904638628f,
268
4.23M
          -0.4082482904638635f,
269
4.23M
          0.0,
270
4.23M
          0.0,
271
4.23M
          -0.40824829046386296f,
272
4.23M
          0.0,
273
4.23M
          0.4082482904638634f,
274
4.23M
          0.408248290463863f,
275
4.23M
          0.0,
276
4.23M
      },
277
4.23M
      {
278
4.23M
          0.0,
279
4.23M
          0.0,
280
4.23M
          0.1747866975480809f,
281
4.23M
          0.0812611176717539f,
282
4.23M
          0.0,
283
4.23M
          0.0,
284
4.23M
          -0.3675398009862027f,
285
4.23M
          -0.307882213957909f,
286
4.23M
          -0.17478669754808135f,
287
4.23M
          0.3675398009862011f,
288
4.23M
          0.0,
289
4.23M
          0.4826689115059883f,
290
4.23M
          -0.08126111767175039f,
291
4.23M
          0.30788221395790305f,
292
4.23M
          -0.48266891150598584f,
293
4.23M
          0.0,
294
4.23M
      },
295
4.23M
      {
296
4.23M
          0.0,
297
4.23M
          0.0,
298
4.23M
          -0.21105601049335784f,
299
4.23M
          0.18567180916109802f,
300
4.23M
          0.0,
301
4.23M
          0.0,
302
4.23M
          0.49215859013738733f,
303
4.23M
          -0.38525013709251915f,
304
4.23M
          0.21105601049335806f,
305
4.23M
          -0.49215859013738905f,
306
4.23M
          0.0,
307
4.23M
          0.17419412659916217f,
308
4.23M
          -0.18567180916109904f,
309
4.23M
          0.3852501370925211f,
310
4.23M
          -0.1741941265991621f,
311
4.23M
          0.0,
312
4.23M
      },
313
4.23M
      {
314
4.23M
          0.0,
315
4.23M
          0.0,
316
4.23M
          -0.14266084808807264f,
317
4.23M
          -0.3416446842253372f,
318
4.23M
          0.0,
319
4.23M
          0.7367497537172237f,
320
4.23M
          0.24627107722075148f,
321
4.23M
          -0.08574019035519306f,
322
4.23M
          -0.14266084808807344f,
323
4.23M
          0.24627107722075137f,
324
4.23M
          0.14883399227113567f,
325
4.23M
          -0.04768680350229251f,
326
4.23M
          -0.3416446842253373f,
327
4.23M
          -0.08574019035519267f,
328
4.23M
          -0.047686803502292804f,
329
4.23M
          -0.14266084808807242f,
330
4.23M
      },
331
4.23M
      {
332
4.23M
          0.0,
333
4.23M
          0.0,
334
4.23M
          -0.13813540350758585f,
335
4.23M
          0.3302282550303788f,
336
4.23M
          0.0,
337
4.23M
          0.08755115000587084f,
338
4.23M
          -0.07946706605909573f,
339
4.23M
          -0.4613374887461511f,
340
4.23M
          -0.13813540350758294f,
341
4.23M
          -0.07946706605910261f,
342
4.23M
          0.49724647109535086f,
343
4.23M
          0.12538059448563663f,
344
4.23M
          0.3302282550303805f,
345
4.23M
          -0.4613374887461554f,
346
4.23M
          0.12538059448564315f,
347
4.23M
          -0.13813540350758452f,
348
4.23M
      },
349
4.23M
      {
350
4.23M
          0.0,
351
4.23M
          0.0,
352
4.23M
          -0.17437602599651067f,
353
4.23M
          0.0702790691196284f,
354
4.23M
          0.0,
355
4.23M
          -0.2921026642334881f,
356
4.23M
          0.3623817333531167f,
357
4.23M
          0.0,
358
4.23M
          -0.1743760259965108f,
359
4.23M
          0.36238173335311646f,
360
4.23M
          0.29210266423348785f,
361
4.23M
          -0.4326608024727445f,
362
4.23M
          0.07027906911962818f,
363
4.23M
          0.0,
364
4.23M
          -0.4326608024727457f,
365
4.23M
          0.34875205199302267f,
366
4.23M
      },
367
4.23M
      {
368
4.23M
          0.0,
369
4.23M
          0.0,
370
4.23M
          0.11354987314994337f,
371
4.23M
          -0.07417504595810355f,
372
4.23M
          0.0,
373
4.23M
          0.19402893032594343f,
374
4.23M
          -0.435190496523228f,
375
4.23M
          0.21918684838857466f,
376
4.23M
          0.11354987314994257f,
377
4.23M
          -0.4351904965232251f,
378
4.23M
          0.5550443808910661f,
379
4.23M
          -0.25468277124066463f,
380
4.23M
          -0.07417504595810233f,
381
4.23M
          0.2191868483885728f,
382
4.23M
          -0.25468277124066413f,
383
4.23M
          0.1135498731499429f,
384
4.23M
      },
385
4.23M
  };
386
387
4.23M
  const HWY_CAPPED(float, 16) d;
388
12.7M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
8.47M
    auto pixel = Zero(d);
390
144M
    for (size_t j = 0; j < 16; j++) {
391
135M
      auto cf = Set(d, coeffs[j]);
392
135M
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
135M
      pixel = MulAdd(cf, basis, pixel);
394
135M
    }
395
8.47M
    Store(pixel, d, pixels + i);
396
8.47M
  }
397
4.23M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
137k
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
137k
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
137k
      {
98
137k
          0.25,
99
137k
          0.25,
100
137k
          0.25,
101
137k
          0.25,
102
137k
          0.25,
103
137k
          0.25,
104
137k
          0.25,
105
137k
          0.25,
106
137k
          0.25,
107
137k
          0.25,
108
137k
          0.25,
109
137k
          0.25,
110
137k
          0.25,
111
137k
          0.25,
112
137k
          0.25,
113
137k
          0.25,
114
137k
      },
115
137k
      {
116
137k
          0.876902929799142f,
117
137k
          0.2206518106944235f,
118
137k
          -0.10140050393753763f,
119
137k
          -0.1014005039375375f,
120
137k
          0.2206518106944236f,
121
137k
          -0.10140050393753777f,
122
137k
          -0.10140050393753772f,
123
137k
          -0.10140050393753763f,
124
137k
          -0.10140050393753758f,
125
137k
          -0.10140050393753769f,
126
137k
          -0.1014005039375375f,
127
137k
          -0.10140050393753768f,
128
137k
          -0.10140050393753768f,
129
137k
          -0.10140050393753759f,
130
137k
          -0.10140050393753763f,
131
137k
          -0.10140050393753741f,
132
137k
      },
133
137k
      {
134
137k
          0.0,
135
137k
          0.0,
136
137k
          0.40670075830260755f,
137
137k
          0.44444816619734445f,
138
137k
          0.0,
139
137k
          0.0,
140
137k
          0.19574399372042936f,
141
137k
          0.2929100136981264f,
142
137k
          -0.40670075830260716f,
143
137k
          -0.19574399372042872f,
144
137k
          0.0,
145
137k
          0.11379074460448091f,
146
137k
          -0.44444816619734384f,
147
137k
          -0.29291001369812636f,
148
137k
          -0.1137907446044814f,
149
137k
          0.0,
150
137k
      },
151
137k
      {
152
137k
          0.0,
153
137k
          0.0,
154
137k
          -0.21255748058288748f,
155
137k
          0.3085497062849767f,
156
137k
          0.0,
157
137k
          0.4706702258572536f,
158
137k
          -0.1621205195722993f,
159
137k
          0.0,
160
137k
          -0.21255748058287047f,
161
137k
          -0.16212051957228327f,
162
137k
          -0.47067022585725277f,
163
137k
          -0.1464291867126764f,
164
137k
          0.3085497062849487f,
165
137k
          0.0,
166
137k
          -0.14642918671266536f,
167
137k
          0.4251149611657548f,
168
137k
      },
169
137k
      {
170
137k
          0.0,
171
137k
          -0.7071067811865474f,
172
137k
          0.0,
173
137k
          0.0,
174
137k
          0.7071067811865476f,
175
137k
          0.0,
176
137k
          0.0,
177
137k
          0.0,
178
137k
          0.0,
179
137k
          0.0,
180
137k
          0.0,
181
137k
          0.0,
182
137k
          0.0,
183
137k
          0.0,
184
137k
          0.0,
185
137k
          0.0,
186
137k
      },
187
137k
      {
188
137k
          -0.4105377591765233f,
189
137k
          0.6235485373547691f,
190
137k
          -0.06435071657946274f,
191
137k
          -0.06435071657946266f,
192
137k
          0.6235485373547694f,
193
137k
          -0.06435071657946284f,
194
137k
          -0.0643507165794628f,
195
137k
          -0.06435071657946274f,
196
137k
          -0.06435071657946272f,
197
137k
          -0.06435071657946279f,
198
137k
          -0.06435071657946266f,
199
137k
          -0.06435071657946277f,
200
137k
          -0.06435071657946277f,
201
137k
          -0.06435071657946273f,
202
137k
          -0.06435071657946274f,
203
137k
          -0.0643507165794626f,
204
137k
      },
205
137k
      {
206
137k
          0.0,
207
137k
          0.0,
208
137k
          -0.4517556589999482f,
209
137k
          0.15854503551840063f,
210
137k
          0.0,
211
137k
          -0.04038515160822202f,
212
137k
          0.0074182263792423875f,
213
137k
          0.39351034269210167f,
214
137k
          -0.45175565899994635f,
215
137k
          0.007418226379244351f,
216
137k
          0.1107416575309343f,
217
137k
          0.08298163094882051f,
218
137k
          0.15854503551839705f,
219
137k
          0.3935103426921022f,
220
137k
          0.0829816309488214f,
221
137k
          -0.45175565899994796f,
222
137k
      },
223
137k
      {
224
137k
          0.0,
225
137k
          0.0,
226
137k
          -0.304684750724869f,
227
137k
          0.5112616136591823f,
228
137k
          0.0,
229
137k
          0.0,
230
137k
          -0.290480129728998f,
231
137k
          -0.06578701549142804f,
232
137k
          0.304684750724884f,
233
137k
          0.2904801297290076f,
234
137k
          0.0,
235
137k
          -0.23889773523344604f,
236
137k
          -0.5112616136592012f,
237
137k
          0.06578701549142545f,
238
137k
          0.23889773523345467f,
239
137k
          0.0,
240
137k
      },
241
137k
      {
242
137k
          0.0,
243
137k
          0.0,
244
137k
          0.3017929516615495f,
245
137k
          0.25792362796341184f,
246
137k
          0.0,
247
137k
          0.16272340142866204f,
248
137k
          0.09520022653475037f,
249
137k
          0.0,
250
137k
          0.3017929516615503f,
251
137k
          0.09520022653475055f,
252
137k
          -0.16272340142866173f,
253
137k
          -0.35312385449816297f,
254
137k
          0.25792362796341295f,
255
137k
          0.0,
256
137k
          -0.3531238544981624f,
257
137k
          -0.6035859033230976f,
258
137k
      },
259
137k
      {
260
137k
          0.0,
261
137k
          0.0,
262
137k
          0.40824829046386274f,
263
137k
          0.0,
264
137k
          0.0,
265
137k
          0.0,
266
137k
          0.0,
267
137k
          -0.4082482904638628f,
268
137k
          -0.4082482904638635f,
269
137k
          0.0,
270
137k
          0.0,
271
137k
          -0.40824829046386296f,
272
137k
          0.0,
273
137k
          0.4082482904638634f,
274
137k
          0.408248290463863f,
275
137k
          0.0,
276
137k
      },
277
137k
      {
278
137k
          0.0,
279
137k
          0.0,
280
137k
          0.1747866975480809f,
281
137k
          0.0812611176717539f,
282
137k
          0.0,
283
137k
          0.0,
284
137k
          -0.3675398009862027f,
285
137k
          -0.307882213957909f,
286
137k
          -0.17478669754808135f,
287
137k
          0.3675398009862011f,
288
137k
          0.0,
289
137k
          0.4826689115059883f,
290
137k
          -0.08126111767175039f,
291
137k
          0.30788221395790305f,
292
137k
          -0.48266891150598584f,
293
137k
          0.0,
294
137k
      },
295
137k
      {
296
137k
          0.0,
297
137k
          0.0,
298
137k
          -0.21105601049335784f,
299
137k
          0.18567180916109802f,
300
137k
          0.0,
301
137k
          0.0,
302
137k
          0.49215859013738733f,
303
137k
          -0.38525013709251915f,
304
137k
          0.21105601049335806f,
305
137k
          -0.49215859013738905f,
306
137k
          0.0,
307
137k
          0.17419412659916217f,
308
137k
          -0.18567180916109904f,
309
137k
          0.3852501370925211f,
310
137k
          -0.1741941265991621f,
311
137k
          0.0,
312
137k
      },
313
137k
      {
314
137k
          0.0,
315
137k
          0.0,
316
137k
          -0.14266084808807264f,
317
137k
          -0.3416446842253372f,
318
137k
          0.0,
319
137k
          0.7367497537172237f,
320
137k
          0.24627107722075148f,
321
137k
          -0.08574019035519306f,
322
137k
          -0.14266084808807344f,
323
137k
          0.24627107722075137f,
324
137k
          0.14883399227113567f,
325
137k
          -0.04768680350229251f,
326
137k
          -0.3416446842253373f,
327
137k
          -0.08574019035519267f,
328
137k
          -0.047686803502292804f,
329
137k
          -0.14266084808807242f,
330
137k
      },
331
137k
      {
332
137k
          0.0,
333
137k
          0.0,
334
137k
          -0.13813540350758585f,
335
137k
          0.3302282550303788f,
336
137k
          0.0,
337
137k
          0.08755115000587084f,
338
137k
          -0.07946706605909573f,
339
137k
          -0.4613374887461511f,
340
137k
          -0.13813540350758294f,
341
137k
          -0.07946706605910261f,
342
137k
          0.49724647109535086f,
343
137k
          0.12538059448563663f,
344
137k
          0.3302282550303805f,
345
137k
          -0.4613374887461554f,
346
137k
          0.12538059448564315f,
347
137k
          -0.13813540350758452f,
348
137k
      },
349
137k
      {
350
137k
          0.0,
351
137k
          0.0,
352
137k
          -0.17437602599651067f,
353
137k
          0.0702790691196284f,
354
137k
          0.0,
355
137k
          -0.2921026642334881f,
356
137k
          0.3623817333531167f,
357
137k
          0.0,
358
137k
          -0.1743760259965108f,
359
137k
          0.36238173335311646f,
360
137k
          0.29210266423348785f,
361
137k
          -0.4326608024727445f,
362
137k
          0.07027906911962818f,
363
137k
          0.0,
364
137k
          -0.4326608024727457f,
365
137k
          0.34875205199302267f,
366
137k
      },
367
137k
      {
368
137k
          0.0,
369
137k
          0.0,
370
137k
          0.11354987314994337f,
371
137k
          -0.07417504595810355f,
372
137k
          0.0,
373
137k
          0.19402893032594343f,
374
137k
          -0.435190496523228f,
375
137k
          0.21918684838857466f,
376
137k
          0.11354987314994257f,
377
137k
          -0.4351904965232251f,
378
137k
          0.5550443808910661f,
379
137k
          -0.25468277124066463f,
380
137k
          -0.07417504595810233f,
381
137k
          0.2191868483885728f,
382
137k
          -0.25468277124066413f,
383
137k
          0.1135498731499429f,
384
137k
      },
385
137k
  };
386
387
137k
  const HWY_CAPPED(float, 16) d;
388
411k
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
274k
    auto pixel = Zero(d);
390
4.65M
    for (size_t j = 0; j < 16; j++) {
391
4.38M
      auto cf = Set(d, coeffs[j]);
392
4.38M
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
4.38M
      pixel = MulAdd(cf, basis, pixel);
394
4.38M
    }
395
274k
    Store(pixel, d, pixels + i);
396
274k
  }
397
137k
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
398
399
template <size_t afv_kind>
400
void AFVTransformToPixels(const float* JXL_RESTRICT coefficients,
401
4.37M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
4.37M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
4.37M
  size_t afv_x = afv_kind & 1;
404
4.37M
  size_t afv_y = afv_kind / 2;
405
4.37M
  float dcs[3] = {};
406
4.37M
  float block00 = coefficients[0];
407
4.37M
  float block01 = coefficients[1];
408
4.37M
  float block10 = coefficients[8];
409
4.37M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
4.37M
  dcs[1] = (block00 + block10 - block01);
411
4.37M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
4.37M
  HWY_ALIGN float coeff[4 * 4];
414
4.37M
  coeff[0] = dcs[0];
415
21.8M
  for (size_t iy = 0; iy < 4; iy++) {
416
87.5M
    for (size_t ix = 0; ix < 4; ix++) {
417
70.0M
      if (ix == 0 && iy == 0) continue;
418
65.6M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
65.6M
    }
420
17.5M
  }
421
4.37M
  HWY_ALIGN float block[4 * 8];
422
4.37M
  AFVIDCT4x4(coeff, block);
423
21.8M
  for (size_t iy = 0; iy < 4; iy++) {
424
87.5M
    for (size_t ix = 0; ix < 4; ix++) {
425
70.0M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
70.0M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
70.0M
    }
428
17.5M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
4.37M
  block[0] = dcs[1];
431
21.8M
  for (size_t iy = 0; iy < 4; iy++) {
432
87.5M
    for (size_t ix = 0; ix < 4; ix++) {
433
70.0M
      if (ix == 0 && iy == 0) continue;
434
65.6M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
65.6M
    }
436
17.5M
  }
437
4.37M
  ComputeScaledIDCT<4, 4>()(
438
4.37M
      block,
439
4.37M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
4.37M
            pixels_stride),
441
4.37M
      scratch_space);
442
  // IDCT4x8.
443
4.37M
  block[0] = dcs[2];
444
21.8M
  for (size_t iy = 0; iy < 4; iy++) {
445
157M
    for (size_t ix = 0; ix < 8; ix++) {
446
140M
      if (ix == 0 && iy == 0) continue;
447
135M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
135M
    }
449
17.5M
  }
450
4.37M
  ComputeScaledIDCT<4, 8>()(
451
4.37M
      block,
452
4.37M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
4.37M
      scratch_space);
454
4.37M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
1.05M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
1.05M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
1.05M
  size_t afv_x = afv_kind & 1;
404
1.05M
  size_t afv_y = afv_kind / 2;
405
1.05M
  float dcs[3] = {};
406
1.05M
  float block00 = coefficients[0];
407
1.05M
  float block01 = coefficients[1];
408
1.05M
  float block10 = coefficients[8];
409
1.05M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
1.05M
  dcs[1] = (block00 + block10 - block01);
411
1.05M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
1.05M
  HWY_ALIGN float coeff[4 * 4];
414
1.05M
  coeff[0] = dcs[0];
415
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
416
21.1M
    for (size_t ix = 0; ix < 4; ix++) {
417
16.9M
      if (ix == 0 && iy == 0) continue;
418
15.8M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
15.8M
    }
420
4.23M
  }
421
1.05M
  HWY_ALIGN float block[4 * 8];
422
1.05M
  AFVIDCT4x4(coeff, block);
423
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
424
21.1M
    for (size_t ix = 0; ix < 4; ix++) {
425
16.9M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
16.9M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
16.9M
    }
428
4.23M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
1.05M
  block[0] = dcs[1];
431
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
432
21.1M
    for (size_t ix = 0; ix < 4; ix++) {
433
16.9M
      if (ix == 0 && iy == 0) continue;
434
15.8M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
15.8M
    }
436
4.23M
  }
437
1.05M
  ComputeScaledIDCT<4, 4>()(
438
1.05M
      block,
439
1.05M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
1.05M
            pixels_stride),
441
1.05M
      scratch_space);
442
  // IDCT4x8.
443
1.05M
  block[0] = dcs[2];
444
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
445
38.1M
    for (size_t ix = 0; ix < 8; ix++) {
446
33.9M
      if (ix == 0 && iy == 0) continue;
447
32.8M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
32.8M
    }
449
4.23M
  }
450
1.05M
  ComputeScaledIDCT<4, 8>()(
451
1.05M
      block,
452
1.05M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
1.05M
      scratch_space);
454
1.05M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
1.05M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
1.05M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
1.05M
  size_t afv_x = afv_kind & 1;
404
1.05M
  size_t afv_y = afv_kind / 2;
405
1.05M
  float dcs[3] = {};
406
1.05M
  float block00 = coefficients[0];
407
1.05M
  float block01 = coefficients[1];
408
1.05M
  float block10 = coefficients[8];
409
1.05M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
1.05M
  dcs[1] = (block00 + block10 - block01);
411
1.05M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
1.05M
  HWY_ALIGN float coeff[4 * 4];
414
1.05M
  coeff[0] = dcs[0];
415
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
416
21.1M
    for (size_t ix = 0; ix < 4; ix++) {
417
16.9M
      if (ix == 0 && iy == 0) continue;
418
15.8M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
15.8M
    }
420
4.23M
  }
421
1.05M
  HWY_ALIGN float block[4 * 8];
422
1.05M
  AFVIDCT4x4(coeff, block);
423
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
424
21.1M
    for (size_t ix = 0; ix < 4; ix++) {
425
16.9M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
16.9M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
16.9M
    }
428
4.23M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
1.05M
  block[0] = dcs[1];
431
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
432
21.1M
    for (size_t ix = 0; ix < 4; ix++) {
433
16.9M
      if (ix == 0 && iy == 0) continue;
434
15.8M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
15.8M
    }
436
4.23M
  }
437
1.05M
  ComputeScaledIDCT<4, 4>()(
438
1.05M
      block,
439
1.05M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
1.05M
            pixels_stride),
441
1.05M
      scratch_space);
442
  // IDCT4x8.
443
1.05M
  block[0] = dcs[2];
444
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
445
38.1M
    for (size_t ix = 0; ix < 8; ix++) {
446
33.9M
      if (ix == 0 && iy == 0) continue;
447
32.8M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
32.8M
    }
449
4.23M
  }
450
1.05M
  ComputeScaledIDCT<4, 8>()(
451
1.05M
      block,
452
1.05M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
1.05M
      scratch_space);
454
1.05M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
1.05M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
1.05M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
1.05M
  size_t afv_x = afv_kind & 1;
404
1.05M
  size_t afv_y = afv_kind / 2;
405
1.05M
  float dcs[3] = {};
406
1.05M
  float block00 = coefficients[0];
407
1.05M
  float block01 = coefficients[1];
408
1.05M
  float block10 = coefficients[8];
409
1.05M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
1.05M
  dcs[1] = (block00 + block10 - block01);
411
1.05M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
1.05M
  HWY_ALIGN float coeff[4 * 4];
414
1.05M
  coeff[0] = dcs[0];
415
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
416
21.1M
    for (size_t ix = 0; ix < 4; ix++) {
417
16.9M
      if (ix == 0 && iy == 0) continue;
418
15.8M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
15.8M
    }
420
4.23M
  }
421
1.05M
  HWY_ALIGN float block[4 * 8];
422
1.05M
  AFVIDCT4x4(coeff, block);
423
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
424
21.1M
    for (size_t ix = 0; ix < 4; ix++) {
425
16.9M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
16.9M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
16.9M
    }
428
4.23M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
1.05M
  block[0] = dcs[1];
431
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
432
21.1M
    for (size_t ix = 0; ix < 4; ix++) {
433
16.9M
      if (ix == 0 && iy == 0) continue;
434
15.8M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
15.8M
    }
436
4.23M
  }
437
1.05M
  ComputeScaledIDCT<4, 4>()(
438
1.05M
      block,
439
1.05M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
1.05M
            pixels_stride),
441
1.05M
      scratch_space);
442
  // IDCT4x8.
443
1.05M
  block[0] = dcs[2];
444
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
445
38.1M
    for (size_t ix = 0; ix < 8; ix++) {
446
33.9M
      if (ix == 0 && iy == 0) continue;
447
32.8M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
32.8M
    }
449
4.23M
  }
450
1.05M
  ComputeScaledIDCT<4, 8>()(
451
1.05M
      block,
452
1.05M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
1.05M
      scratch_space);
454
1.05M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
1.05M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
1.05M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
1.05M
  size_t afv_x = afv_kind & 1;
404
1.05M
  size_t afv_y = afv_kind / 2;
405
1.05M
  float dcs[3] = {};
406
1.05M
  float block00 = coefficients[0];
407
1.05M
  float block01 = coefficients[1];
408
1.05M
  float block10 = coefficients[8];
409
1.05M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
1.05M
  dcs[1] = (block00 + block10 - block01);
411
1.05M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
1.05M
  HWY_ALIGN float coeff[4 * 4];
414
1.05M
  coeff[0] = dcs[0];
415
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
416
21.1M
    for (size_t ix = 0; ix < 4; ix++) {
417
16.9M
      if (ix == 0 && iy == 0) continue;
418
15.8M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
15.8M
    }
420
4.23M
  }
421
1.05M
  HWY_ALIGN float block[4 * 8];
422
1.05M
  AFVIDCT4x4(coeff, block);
423
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
424
21.1M
    for (size_t ix = 0; ix < 4; ix++) {
425
16.9M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
16.9M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
16.9M
    }
428
4.23M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
1.05M
  block[0] = dcs[1];
431
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
432
21.1M
    for (size_t ix = 0; ix < 4; ix++) {
433
16.9M
      if (ix == 0 && iy == 0) continue;
434
15.8M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
15.8M
    }
436
4.23M
  }
437
1.05M
  ComputeScaledIDCT<4, 4>()(
438
1.05M
      block,
439
1.05M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
1.05M
            pixels_stride),
441
1.05M
      scratch_space);
442
  // IDCT4x8.
443
1.05M
  block[0] = dcs[2];
444
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
445
38.1M
    for (size_t ix = 0; ix < 8; ix++) {
446
33.9M
      if (ix == 0 && iy == 0) continue;
447
32.8M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
32.8M
    }
449
4.23M
  }
450
1.05M
  ComputeScaledIDCT<4, 8>()(
451
1.05M
      block,
452
1.05M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
1.05M
      scratch_space);
454
1.05M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
37.5k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
37.5k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
37.5k
  size_t afv_x = afv_kind & 1;
404
37.5k
  size_t afv_y = afv_kind / 2;
405
37.5k
  float dcs[3] = {};
406
37.5k
  float block00 = coefficients[0];
407
37.5k
  float block01 = coefficients[1];
408
37.5k
  float block10 = coefficients[8];
409
37.5k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
37.5k
  dcs[1] = (block00 + block10 - block01);
411
37.5k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
37.5k
  HWY_ALIGN float coeff[4 * 4];
414
37.5k
  coeff[0] = dcs[0];
415
187k
  for (size_t iy = 0; iy < 4; iy++) {
416
750k
    for (size_t ix = 0; ix < 4; ix++) {
417
600k
      if (ix == 0 && iy == 0) continue;
418
563k
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
563k
    }
420
150k
  }
421
37.5k
  HWY_ALIGN float block[4 * 8];
422
37.5k
  AFVIDCT4x4(coeff, block);
423
187k
  for (size_t iy = 0; iy < 4; iy++) {
424
750k
    for (size_t ix = 0; ix < 4; ix++) {
425
600k
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
600k
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
600k
    }
428
150k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
37.5k
  block[0] = dcs[1];
431
187k
  for (size_t iy = 0; iy < 4; iy++) {
432
750k
    for (size_t ix = 0; ix < 4; ix++) {
433
600k
      if (ix == 0 && iy == 0) continue;
434
563k
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
563k
    }
436
150k
  }
437
37.5k
  ComputeScaledIDCT<4, 4>()(
438
37.5k
      block,
439
37.5k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
37.5k
            pixels_stride),
441
37.5k
      scratch_space);
442
  // IDCT4x8.
443
37.5k
  block[0] = dcs[2];
444
187k
  for (size_t iy = 0; iy < 4; iy++) {
445
1.35M
    for (size_t ix = 0; ix < 8; ix++) {
446
1.20M
      if (ix == 0 && iy == 0) continue;
447
1.16M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
1.16M
    }
449
150k
  }
450
37.5k
  ComputeScaledIDCT<4, 8>()(
451
37.5k
      block,
452
37.5k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
37.5k
      scratch_space);
454
37.5k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
24.2k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
24.2k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
24.2k
  size_t afv_x = afv_kind & 1;
404
24.2k
  size_t afv_y = afv_kind / 2;
405
24.2k
  float dcs[3] = {};
406
24.2k
  float block00 = coefficients[0];
407
24.2k
  float block01 = coefficients[1];
408
24.2k
  float block10 = coefficients[8];
409
24.2k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
24.2k
  dcs[1] = (block00 + block10 - block01);
411
24.2k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
24.2k
  HWY_ALIGN float coeff[4 * 4];
414
24.2k
  coeff[0] = dcs[0];
415
121k
  for (size_t iy = 0; iy < 4; iy++) {
416
485k
    for (size_t ix = 0; ix < 4; ix++) {
417
388k
      if (ix == 0 && iy == 0) continue;
418
364k
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
364k
    }
420
97.1k
  }
421
24.2k
  HWY_ALIGN float block[4 * 8];
422
24.2k
  AFVIDCT4x4(coeff, block);
423
121k
  for (size_t iy = 0; iy < 4; iy++) {
424
485k
    for (size_t ix = 0; ix < 4; ix++) {
425
388k
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
388k
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
388k
    }
428
97.1k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
24.2k
  block[0] = dcs[1];
431
121k
  for (size_t iy = 0; iy < 4; iy++) {
432
485k
    for (size_t ix = 0; ix < 4; ix++) {
433
388k
      if (ix == 0 && iy == 0) continue;
434
364k
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
364k
    }
436
97.1k
  }
437
24.2k
  ComputeScaledIDCT<4, 4>()(
438
24.2k
      block,
439
24.2k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
24.2k
            pixels_stride),
441
24.2k
      scratch_space);
442
  // IDCT4x8.
443
24.2k
  block[0] = dcs[2];
444
121k
  for (size_t iy = 0; iy < 4; iy++) {
445
874k
    for (size_t ix = 0; ix < 8; ix++) {
446
777k
      if (ix == 0 && iy == 0) continue;
447
753k
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
753k
    }
449
97.1k
  }
450
24.2k
  ComputeScaledIDCT<4, 8>()(
451
24.2k
      block,
452
24.2k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
24.2k
      scratch_space);
454
24.2k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
30.4k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
30.4k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
30.4k
  size_t afv_x = afv_kind & 1;
404
30.4k
  size_t afv_y = afv_kind / 2;
405
30.4k
  float dcs[3] = {};
406
30.4k
  float block00 = coefficients[0];
407
30.4k
  float block01 = coefficients[1];
408
30.4k
  float block10 = coefficients[8];
409
30.4k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
30.4k
  dcs[1] = (block00 + block10 - block01);
411
30.4k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
30.4k
  HWY_ALIGN float coeff[4 * 4];
414
30.4k
  coeff[0] = dcs[0];
415
152k
  for (size_t iy = 0; iy < 4; iy++) {
416
609k
    for (size_t ix = 0; ix < 4; ix++) {
417
487k
      if (ix == 0 && iy == 0) continue;
418
457k
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
457k
    }
420
121k
  }
421
30.4k
  HWY_ALIGN float block[4 * 8];
422
30.4k
  AFVIDCT4x4(coeff, block);
423
152k
  for (size_t iy = 0; iy < 4; iy++) {
424
609k
    for (size_t ix = 0; ix < 4; ix++) {
425
487k
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
487k
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
487k
    }
428
121k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
30.4k
  block[0] = dcs[1];
431
152k
  for (size_t iy = 0; iy < 4; iy++) {
432
609k
    for (size_t ix = 0; ix < 4; ix++) {
433
487k
      if (ix == 0 && iy == 0) continue;
434
457k
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
457k
    }
436
121k
  }
437
30.4k
  ComputeScaledIDCT<4, 4>()(
438
30.4k
      block,
439
30.4k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
30.4k
            pixels_stride),
441
30.4k
      scratch_space);
442
  // IDCT4x8.
443
30.4k
  block[0] = dcs[2];
444
152k
  for (size_t iy = 0; iy < 4; iy++) {
445
1.09M
    for (size_t ix = 0; ix < 8; ix++) {
446
975k
      if (ix == 0 && iy == 0) continue;
447
944k
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
944k
    }
449
121k
  }
450
30.4k
  ComputeScaledIDCT<4, 8>()(
451
30.4k
      block,
452
30.4k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
30.4k
      scratch_space);
454
30.4k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
44.7k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
44.7k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
44.7k
  size_t afv_x = afv_kind & 1;
404
44.7k
  size_t afv_y = afv_kind / 2;
405
44.7k
  float dcs[3] = {};
406
44.7k
  float block00 = coefficients[0];
407
44.7k
  float block01 = coefficients[1];
408
44.7k
  float block10 = coefficients[8];
409
44.7k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
44.7k
  dcs[1] = (block00 + block10 - block01);
411
44.7k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
44.7k
  HWY_ALIGN float coeff[4 * 4];
414
44.7k
  coeff[0] = dcs[0];
415
223k
  for (size_t iy = 0; iy < 4; iy++) {
416
894k
    for (size_t ix = 0; ix < 4; ix++) {
417
715k
      if (ix == 0 && iy == 0) continue;
418
670k
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
670k
    }
420
178k
  }
421
44.7k
  HWY_ALIGN float block[4 * 8];
422
44.7k
  AFVIDCT4x4(coeff, block);
423
223k
  for (size_t iy = 0; iy < 4; iy++) {
424
894k
    for (size_t ix = 0; ix < 4; ix++) {
425
715k
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
715k
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
715k
    }
428
178k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
44.7k
  block[0] = dcs[1];
431
223k
  for (size_t iy = 0; iy < 4; iy++) {
432
894k
    for (size_t ix = 0; ix < 4; ix++) {
433
715k
      if (ix == 0 && iy == 0) continue;
434
670k
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
670k
    }
436
178k
  }
437
44.7k
  ComputeScaledIDCT<4, 4>()(
438
44.7k
      block,
439
44.7k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
44.7k
            pixels_stride),
441
44.7k
      scratch_space);
442
  // IDCT4x8.
443
44.7k
  block[0] = dcs[2];
444
223k
  for (size_t iy = 0; iy < 4; iy++) {
445
1.60M
    for (size_t ix = 0; ix < 8; ix++) {
446
1.43M
      if (ix == 0 && iy == 0) continue;
447
1.38M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
1.38M
    }
449
178k
  }
450
44.7k
  ComputeScaledIDCT<4, 8>()(
451
44.7k
      block,
452
44.7k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
44.7k
      scratch_space);
454
44.7k
}
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
455
456
HWY_MAYBE_UNUSED void TransformToPixels(const AcStrategyType strategy,
457
                                        float* JXL_RESTRICT coefficients,
458
                                        float* JXL_RESTRICT pixels,
459
                                        size_t pixels_stride,
460
15.4M
                                        float* scratch_space) {
461
15.4M
  using Type = AcStrategyType;
462
15.4M
  switch (strategy) {
463
1.64M
    case Type::IDENTITY: {
464
1.64M
      float dcs[4] = {};
465
1.64M
      float block00 = coefficients[0];
466
1.64M
      float block01 = coefficients[1];
467
1.64M
      float block10 = coefficients[8];
468
1.64M
      float block11 = coefficients[9];
469
1.64M
      dcs[0] = block00 + block01 + block10 + block11;
470
1.64M
      dcs[1] = block00 + block01 - block10 - block11;
471
1.64M
      dcs[2] = block00 - block01 + block10 - block11;
472
1.64M
      dcs[3] = block00 - block01 - block10 + block11;
473
4.94M
      for (size_t y = 0; y < 2; y++) {
474
9.88M
        for (size_t x = 0; x < 2; x++) {
475
6.59M
          float block_dc = dcs[y * 2 + x];
476
6.59M
          float residual_sum = 0;
477
32.9M
          for (size_t iy = 0; iy < 4; iy++) {
478
131M
            for (size_t ix = 0; ix < 4; ix++) {
479
105M
              if (ix == 0 && iy == 0) continue;
480
98.8M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
98.8M
            }
482
26.3M
          }
483
6.59M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
6.59M
              block_dc - residual_sum * (1.0f / 16);
485
32.9M
          for (size_t iy = 0; iy < 4; iy++) {
486
131M
            for (size_t ix = 0; ix < 4; ix++) {
487
105M
              if (ix == 1 && iy == 1) continue;
488
98.8M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
98.8M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
98.8M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
98.8M
            }
492
26.3M
          }
493
6.59M
          pixels[y * 4 * pixels_stride + x * 4] =
494
6.59M
              coefficients[(y + 2) * 8 + x + 2] +
495
6.59M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
6.59M
        }
497
3.29M
      }
498
1.64M
      break;
499
0
    }
500
1.16M
    case Type::DCT8X4: {
501
1.16M
      float dcs[2] = {};
502
1.16M
      float block0 = coefficients[0];
503
1.16M
      float block1 = coefficients[8];
504
1.16M
      dcs[0] = block0 + block1;
505
1.16M
      dcs[1] = block0 - block1;
506
3.49M
      for (size_t x = 0; x < 2; x++) {
507
2.32M
        HWY_ALIGN float block[4 * 8];
508
2.32M
        block[0] = dcs[x];
509
11.6M
        for (size_t iy = 0; iy < 4; iy++) {
510
83.8M
          for (size_t ix = 0; ix < 8; ix++) {
511
74.5M
            if (ix == 0 && iy == 0) continue;
512
72.1M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
72.1M
          }
514
9.31M
        }
515
2.32M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
2.32M
                                  scratch_space);
517
2.32M
      }
518
1.16M
      break;
519
0
    }
520
1.11M
    case Type::DCT4X8: {
521
1.11M
      float dcs[2] = {};
522
1.11M
      float block0 = coefficients[0];
523
1.11M
      float block1 = coefficients[8];
524
1.11M
      dcs[0] = block0 + block1;
525
1.11M
      dcs[1] = block0 - block1;
526
3.34M
      for (size_t y = 0; y < 2; y++) {
527
2.23M
        HWY_ALIGN float block[4 * 8];
528
2.23M
        block[0] = dcs[y];
529
11.1M
        for (size_t iy = 0; iy < 4; iy++) {
530
80.3M
          for (size_t ix = 0; ix < 8; ix++) {
531
71.3M
            if (ix == 0 && iy == 0) continue;
532
69.1M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
69.1M
          }
534
8.92M
        }
535
2.23M
        ComputeScaledIDCT<4, 8>()(
536
2.23M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
2.23M
            scratch_space);
538
2.23M
      }
539
1.11M
      break;
540
0
    }
541
1.06M
    case Type::DCT4X4: {
542
1.06M
      float dcs[4] = {};
543
1.06M
      float block00 = coefficients[0];
544
1.06M
      float block01 = coefficients[1];
545
1.06M
      float block10 = coefficients[8];
546
1.06M
      float block11 = coefficients[9];
547
1.06M
      dcs[0] = block00 + block01 + block10 + block11;
548
1.06M
      dcs[1] = block00 + block01 - block10 - block11;
549
1.06M
      dcs[2] = block00 - block01 + block10 - block11;
550
1.06M
      dcs[3] = block00 - block01 - block10 + block11;
551
3.18M
      for (size_t y = 0; y < 2; y++) {
552
6.36M
        for (size_t x = 0; x < 2; x++) {
553
4.24M
          HWY_ALIGN float block[4 * 4];
554
4.24M
          block[0] = dcs[y * 2 + x];
555
21.2M
          for (size_t iy = 0; iy < 4; iy++) {
556
84.8M
            for (size_t ix = 0; ix < 4; ix++) {
557
67.8M
              if (ix == 0 && iy == 0) continue;
558
63.6M
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
63.6M
            }
560
16.9M
          }
561
4.24M
          ComputeScaledIDCT<4, 4>()(
562
4.24M
              block,
563
4.24M
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
4.24M
              scratch_space);
565
4.24M
        }
566
2.12M
      }
567
1.06M
      break;
568
0
    }
569
1.47M
    case Type::DCT2X2: {
570
1.47M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
1.47M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
1.47M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
1.47M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
1.47M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
13.2M
      for (size_t y = 0; y < kBlockDim; y++) {
576
105M
        for (size_t x = 0; x < kBlockDim; x++) {
577
94.0M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
94.0M
        }
579
11.7M
      }
580
1.47M
      break;
581
0
    }
582
470k
    case Type::DCT16X16: {
583
470k
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
470k
                                  scratch_space);
585
470k
      break;
586
0
    }
587
886k
    case Type::DCT16X8: {
588
886k
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
886k
                                 scratch_space);
590
886k
      break;
591
0
    }
592
905k
    case Type::DCT8X16: {
593
905k
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
905k
                                 scratch_space);
595
905k
      break;
596
0
    }
597
0
    case Type::DCT32X8: {
598
0
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
0
                                 scratch_space);
600
0
      break;
601
0
    }
602
0
    case Type::DCT8X32: {
603
0
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
0
                                 scratch_space);
605
0
      break;
606
0
    }
607
184k
    case Type::DCT32X16: {
608
184k
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
184k
                                  scratch_space);
610
184k
      break;
611
0
    }
612
189k
    case Type::DCT16X32: {
613
189k
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
189k
                                  scratch_space);
615
189k
      break;
616
0
    }
617
133k
    case Type::DCT32X32: {
618
133k
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
133k
                                  scratch_space);
620
133k
      break;
621
0
    }
622
1.76M
    case Type::DCT: {
623
1.76M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
1.76M
                                scratch_space);
625
1.76M
      break;
626
0
    }
627
1.09M
    case Type::AFV0: {
628
1.09M
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
1.09M
      break;
630
0
    }
631
1.08M
    case Type::AFV1: {
632
1.08M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
1.08M
      break;
634
0
    }
635
1.09M
    case Type::AFV2: {
636
1.09M
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
1.09M
      break;
638
0
    }
639
1.10M
    case Type::AFV3: {
640
1.10M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
1.10M
      break;
642
0
    }
643
58.1k
    case Type::DCT64X32: {
644
58.1k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
58.1k
                                  scratch_space);
646
58.1k
      break;
647
0
    }
648
42.2k
    case Type::DCT32X64: {
649
42.2k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
42.2k
                                  scratch_space);
651
42.2k
      break;
652
0
    }
653
18.8k
    case Type::DCT64X64: {
654
18.8k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
18.8k
                                  scratch_space);
656
18.8k
      break;
657
0
    }
658
0
    case Type::DCT128X64: {
659
0
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT64X128: {
664
0
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
0
    case Type::DCT128X128: {
669
0
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
0
                                    scratch_space);
671
0
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
15.4M
  }
689
15.4M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
13.2M
                                        float* scratch_space) {
461
13.2M
  using Type = AcStrategyType;
462
13.2M
  switch (strategy) {
463
1.05M
    case Type::IDENTITY: {
464
1.05M
      float dcs[4] = {};
465
1.05M
      float block00 = coefficients[0];
466
1.05M
      float block01 = coefficients[1];
467
1.05M
      float block10 = coefficients[8];
468
1.05M
      float block11 = coefficients[9];
469
1.05M
      dcs[0] = block00 + block01 + block10 + block11;
470
1.05M
      dcs[1] = block00 + block01 - block10 - block11;
471
1.05M
      dcs[2] = block00 - block01 + block10 - block11;
472
1.05M
      dcs[3] = block00 - block01 - block10 + block11;
473
3.17M
      for (size_t y = 0; y < 2; y++) {
474
6.35M
        for (size_t x = 0; x < 2; x++) {
475
4.23M
          float block_dc = dcs[y * 2 + x];
476
4.23M
          float residual_sum = 0;
477
21.1M
          for (size_t iy = 0; iy < 4; iy++) {
478
84.7M
            for (size_t ix = 0; ix < 4; ix++) {
479
67.8M
              if (ix == 0 && iy == 0) continue;
480
63.5M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
63.5M
            }
482
16.9M
          }
483
4.23M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
4.23M
              block_dc - residual_sum * (1.0f / 16);
485
21.1M
          for (size_t iy = 0; iy < 4; iy++) {
486
84.7M
            for (size_t ix = 0; ix < 4; ix++) {
487
67.8M
              if (ix == 1 && iy == 1) continue;
488
63.5M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
63.5M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
63.5M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
63.5M
            }
492
16.9M
          }
493
4.23M
          pixels[y * 4 * pixels_stride + x * 4] =
494
4.23M
              coefficients[(y + 2) * 8 + x + 2] +
495
4.23M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
4.23M
        }
497
2.11M
      }
498
1.05M
      break;
499
0
    }
500
1.05M
    case Type::DCT8X4: {
501
1.05M
      float dcs[2] = {};
502
1.05M
      float block0 = coefficients[0];
503
1.05M
      float block1 = coefficients[8];
504
1.05M
      dcs[0] = block0 + block1;
505
1.05M
      dcs[1] = block0 - block1;
506
3.17M
      for (size_t x = 0; x < 2; x++) {
507
2.11M
        HWY_ALIGN float block[4 * 8];
508
2.11M
        block[0] = dcs[x];
509
10.5M
        for (size_t iy = 0; iy < 4; iy++) {
510
76.2M
          for (size_t ix = 0; ix < 8; ix++) {
511
67.8M
            if (ix == 0 && iy == 0) continue;
512
65.6M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
65.6M
          }
514
8.47M
        }
515
2.11M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
2.11M
                                  scratch_space);
517
2.11M
      }
518
1.05M
      break;
519
0
    }
520
1.05M
    case Type::DCT4X8: {
521
1.05M
      float dcs[2] = {};
522
1.05M
      float block0 = coefficients[0];
523
1.05M
      float block1 = coefficients[8];
524
1.05M
      dcs[0] = block0 + block1;
525
1.05M
      dcs[1] = block0 - block1;
526
3.17M
      for (size_t y = 0; y < 2; y++) {
527
2.11M
        HWY_ALIGN float block[4 * 8];
528
2.11M
        block[0] = dcs[y];
529
10.5M
        for (size_t iy = 0; iy < 4; iy++) {
530
76.2M
          for (size_t ix = 0; ix < 8; ix++) {
531
67.8M
            if (ix == 0 && iy == 0) continue;
532
65.6M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
65.6M
          }
534
8.47M
        }
535
2.11M
        ComputeScaledIDCT<4, 8>()(
536
2.11M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
2.11M
            scratch_space);
538
2.11M
      }
539
1.05M
      break;
540
0
    }
541
1.05M
    case Type::DCT4X4: {
542
1.05M
      float dcs[4] = {};
543
1.05M
      float block00 = coefficients[0];
544
1.05M
      float block01 = coefficients[1];
545
1.05M
      float block10 = coefficients[8];
546
1.05M
      float block11 = coefficients[9];
547
1.05M
      dcs[0] = block00 + block01 + block10 + block11;
548
1.05M
      dcs[1] = block00 + block01 - block10 - block11;
549
1.05M
      dcs[2] = block00 - block01 + block10 - block11;
550
1.05M
      dcs[3] = block00 - block01 - block10 + block11;
551
3.17M
      for (size_t y = 0; y < 2; y++) {
552
6.35M
        for (size_t x = 0; x < 2; x++) {
553
4.23M
          HWY_ALIGN float block[4 * 4];
554
4.23M
          block[0] = dcs[y * 2 + x];
555
21.1M
          for (size_t iy = 0; iy < 4; iy++) {
556
84.7M
            for (size_t ix = 0; ix < 4; ix++) {
557
67.8M
              if (ix == 0 && iy == 0) continue;
558
63.5M
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
63.5M
            }
560
16.9M
          }
561
4.23M
          ComputeScaledIDCT<4, 4>()(
562
4.23M
              block,
563
4.23M
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
4.23M
              scratch_space);
565
4.23M
        }
566
2.11M
      }
567
1.05M
      break;
568
0
    }
569
1.05M
    case Type::DCT2X2: {
570
1.05M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
1.05M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
1.05M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
1.05M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
1.05M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
9.53M
      for (size_t y = 0; y < kBlockDim; y++) {
576
76.2M
        for (size_t x = 0; x < kBlockDim; x++) {
577
67.8M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
67.8M
        }
579
8.47M
      }
580
1.05M
      break;
581
0
    }
582
419k
    case Type::DCT16X16: {
583
419k
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
419k
                                  scratch_space);
585
419k
      break;
586
0
    }
587
827k
    case Type::DCT16X8: {
588
827k
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
827k
                                 scratch_space);
590
827k
      break;
591
0
    }
592
830k
    case Type::DCT8X16: {
593
830k
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
830k
                                 scratch_space);
595
830k
      break;
596
0
    }
597
0
    case Type::DCT32X8: {
598
0
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
0
                                 scratch_space);
600
0
      break;
601
0
    }
602
0
    case Type::DCT8X32: {
603
0
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
0
                                 scratch_space);
605
0
      break;
606
0
    }
607
164k
    case Type::DCT32X16: {
608
164k
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
164k
                                  scratch_space);
610
164k
      break;
611
0
    }
612
166k
    case Type::DCT16X32: {
613
166k
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
166k
                                  scratch_space);
615
166k
      break;
616
0
    }
617
86.2k
    case Type::DCT32X32: {
618
86.2k
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
86.2k
                                  scratch_space);
620
86.2k
      break;
621
0
    }
622
1.05M
    case Type::DCT: {
623
1.05M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
1.05M
                                scratch_space);
625
1.05M
      break;
626
0
    }
627
1.05M
    case Type::AFV0: {
628
1.05M
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
1.05M
      break;
630
0
    }
631
1.05M
    case Type::AFV1: {
632
1.05M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
1.05M
      break;
634
0
    }
635
1.05M
    case Type::AFV2: {
636
1.05M
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
1.05M
      break;
638
0
    }
639
1.05M
    case Type::AFV3: {
640
1.05M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
1.05M
      break;
642
0
    }
643
57.5k
    case Type::DCT64X32: {
644
57.5k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
57.5k
                                  scratch_space);
646
57.5k
      break;
647
0
    }
648
42.0k
    case Type::DCT32X64: {
649
42.0k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
42.0k
                                  scratch_space);
651
42.0k
      break;
652
0
    }
653
15.0k
    case Type::DCT64X64: {
654
15.0k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
15.0k
                                  scratch_space);
656
15.0k
      break;
657
0
    }
658
0
    case Type::DCT128X64: {
659
0
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT64X128: {
664
0
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
0
    case Type::DCT128X128: {
669
0
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
0
                                    scratch_space);
671
0
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
13.2M
  }
689
13.2M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
2.27M
                                        float* scratch_space) {
461
2.27M
  using Type = AcStrategyType;
462
2.27M
  switch (strategy) {
463
588k
    case Type::IDENTITY: {
464
588k
      float dcs[4] = {};
465
588k
      float block00 = coefficients[0];
466
588k
      float block01 = coefficients[1];
467
588k
      float block10 = coefficients[8];
468
588k
      float block11 = coefficients[9];
469
588k
      dcs[0] = block00 + block01 + block10 + block11;
470
588k
      dcs[1] = block00 + block01 - block10 - block11;
471
588k
      dcs[2] = block00 - block01 + block10 - block11;
472
588k
      dcs[3] = block00 - block01 - block10 + block11;
473
1.76M
      for (size_t y = 0; y < 2; y++) {
474
3.53M
        for (size_t x = 0; x < 2; x++) {
475
2.35M
          float block_dc = dcs[y * 2 + x];
476
2.35M
          float residual_sum = 0;
477
11.7M
          for (size_t iy = 0; iy < 4; iy++) {
478
47.0M
            for (size_t ix = 0; ix < 4; ix++) {
479
37.6M
              if (ix == 0 && iy == 0) continue;
480
35.3M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
35.3M
            }
482
9.41M
          }
483
2.35M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
2.35M
              block_dc - residual_sum * (1.0f / 16);
485
11.7M
          for (size_t iy = 0; iy < 4; iy++) {
486
47.0M
            for (size_t ix = 0; ix < 4; ix++) {
487
37.6M
              if (ix == 1 && iy == 1) continue;
488
35.3M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
35.3M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
35.3M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
35.3M
            }
492
9.41M
          }
493
2.35M
          pixels[y * 4 * pixels_stride + x * 4] =
494
2.35M
              coefficients[(y + 2) * 8 + x + 2] +
495
2.35M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
2.35M
        }
497
1.17M
      }
498
588k
      break;
499
0
    }
500
104k
    case Type::DCT8X4: {
501
104k
      float dcs[2] = {};
502
104k
      float block0 = coefficients[0];
503
104k
      float block1 = coefficients[8];
504
104k
      dcs[0] = block0 + block1;
505
104k
      dcs[1] = block0 - block1;
506
313k
      for (size_t x = 0; x < 2; x++) {
507
209k
        HWY_ALIGN float block[4 * 8];
508
209k
        block[0] = dcs[x];
509
1.04M
        for (size_t iy = 0; iy < 4; iy++) {
510
7.52M
          for (size_t ix = 0; ix < 8; ix++) {
511
6.69M
            if (ix == 0 && iy == 0) continue;
512
6.48M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
6.48M
          }
514
836k
        }
515
209k
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
209k
                                  scratch_space);
517
209k
      }
518
104k
      break;
519
0
    }
520
55.9k
    case Type::DCT4X8: {
521
55.9k
      float dcs[2] = {};
522
55.9k
      float block0 = coefficients[0];
523
55.9k
      float block1 = coefficients[8];
524
55.9k
      dcs[0] = block0 + block1;
525
55.9k
      dcs[1] = block0 - block1;
526
167k
      for (size_t y = 0; y < 2; y++) {
527
111k
        HWY_ALIGN float block[4 * 8];
528
111k
        block[0] = dcs[y];
529
559k
        for (size_t iy = 0; iy < 4; iy++) {
530
4.02M
          for (size_t ix = 0; ix < 8; ix++) {
531
3.58M
            if (ix == 0 && iy == 0) continue;
532
3.47M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
3.47M
          }
534
447k
        }
535
111k
        ComputeScaledIDCT<4, 8>()(
536
111k
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
111k
            scratch_space);
538
111k
      }
539
55.9k
      break;
540
0
    }
541
1.23k
    case Type::DCT4X4: {
542
1.23k
      float dcs[4] = {};
543
1.23k
      float block00 = coefficients[0];
544
1.23k
      float block01 = coefficients[1];
545
1.23k
      float block10 = coefficients[8];
546
1.23k
      float block11 = coefficients[9];
547
1.23k
      dcs[0] = block00 + block01 + block10 + block11;
548
1.23k
      dcs[1] = block00 + block01 - block10 - block11;
549
1.23k
      dcs[2] = block00 - block01 + block10 - block11;
550
1.23k
      dcs[3] = block00 - block01 - block10 + block11;
551
3.69k
      for (size_t y = 0; y < 2; y++) {
552
7.38k
        for (size_t x = 0; x < 2; x++) {
553
4.92k
          HWY_ALIGN float block[4 * 4];
554
4.92k
          block[0] = dcs[y * 2 + x];
555
24.6k
          for (size_t iy = 0; iy < 4; iy++) {
556
98.4k
            for (size_t ix = 0; ix < 4; ix++) {
557
78.7k
              if (ix == 0 && iy == 0) continue;
558
73.8k
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
73.8k
            }
560
19.6k
          }
561
4.92k
          ComputeScaledIDCT<4, 4>()(
562
4.92k
              block,
563
4.92k
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
4.92k
              scratch_space);
565
4.92k
        }
566
2.46k
      }
567
1.23k
      break;
568
0
    }
569
410k
    case Type::DCT2X2: {
570
410k
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
410k
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
410k
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
410k
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
410k
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
3.69M
      for (size_t y = 0; y < kBlockDim; y++) {
576
29.5M
        for (size_t x = 0; x < kBlockDim; x++) {
577
26.2M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
26.2M
        }
579
3.28M
      }
580
410k
      break;
581
0
    }
582
50.5k
    case Type::DCT16X16: {
583
50.5k
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
50.5k
                                  scratch_space);
585
50.5k
      break;
586
0
    }
587
58.8k
    case Type::DCT16X8: {
588
58.8k
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
58.8k
                                 scratch_space);
590
58.8k
      break;
591
0
    }
592
74.7k
    case Type::DCT8X16: {
593
74.7k
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
74.7k
                                 scratch_space);
595
74.7k
      break;
596
0
    }
597
0
    case Type::DCT32X8: {
598
0
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
0
                                 scratch_space);
600
0
      break;
601
0
    }
602
0
    case Type::DCT8X32: {
603
0
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
0
                                 scratch_space);
605
0
      break;
606
0
    }
607
19.3k
    case Type::DCT32X16: {
608
19.3k
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
19.3k
                                  scratch_space);
610
19.3k
      break;
611
0
    }
612
23.0k
    case Type::DCT16X32: {
613
23.0k
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
23.0k
                                  scratch_space);
615
23.0k
      break;
616
0
    }
617
46.8k
    case Type::DCT32X32: {
618
46.8k
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
46.8k
                                  scratch_space);
620
46.8k
      break;
621
0
    }
622
701k
    case Type::DCT: {
623
701k
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
701k
                                scratch_space);
625
701k
      break;
626
0
    }
627
37.5k
    case Type::AFV0: {
628
37.5k
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
37.5k
      break;
630
0
    }
631
24.2k
    case Type::AFV1: {
632
24.2k
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
24.2k
      break;
634
0
    }
635
30.4k
    case Type::AFV2: {
636
30.4k
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
30.4k
      break;
638
0
    }
639
44.7k
    case Type::AFV3: {
640
44.7k
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
44.7k
      break;
642
0
    }
643
594
    case Type::DCT64X32: {
644
594
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
594
                                  scratch_space);
646
594
      break;
647
0
    }
648
288
    case Type::DCT32X64: {
649
288
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
288
                                  scratch_space);
651
288
      break;
652
0
    }
653
3.80k
    case Type::DCT64X64: {
654
3.80k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
3.80k
                                  scratch_space);
656
3.80k
      break;
657
0
    }
658
0
    case Type::DCT128X64: {
659
0
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT64X128: {
664
0
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
0
    case Type::DCT128X128: {
669
0
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
0
                                    scratch_space);
671
0
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
2.27M
  }
689
2.27M
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
690
691
HWY_MAYBE_UNUSED void LowestFrequenciesFromDC(const AcStrategyType strategy,
692
                                              const float* dc, size_t dc_stride,
693
                                              float* llf,
694
2.38M
                                              float* JXL_RESTRICT scratch) {
695
2.38M
  using Type = AcStrategyType;
696
2.38M
  HWY_ALIGN float warm_block[4 * 4];
697
2.38M
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
2.38M
  switch (strategy) {
699
58.8k
    case Type::DCT16X8: {
700
58.8k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
58.8k
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
58.8k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
58.8k
      break;
704
0
    }
705
74.7k
    case Type::DCT8X16: {
706
74.7k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
74.7k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
74.7k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
74.7k
      break;
710
0
    }
711
50.5k
    case Type::DCT16X16: {
712
50.5k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
50.5k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
50.5k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
50.5k
      break;
716
0
    }
717
0
    case Type::DCT32X8: {
718
0
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
0
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
0
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
0
      break;
722
0
    }
723
0
    case Type::DCT8X32: {
724
0
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
0
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
0
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
0
      break;
728
0
    }
729
19.3k
    case Type::DCT32X16: {
730
19.3k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
19.3k
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
19.3k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
19.3k
      break;
734
0
    }
735
23.0k
    case Type::DCT16X32: {
736
23.0k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
23.0k
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
23.0k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
23.0k
      break;
740
0
    }
741
46.8k
    case Type::DCT32X32: {
742
46.8k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
46.8k
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
46.8k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
46.8k
      break;
746
0
    }
747
594
    case Type::DCT64X32: {
748
594
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
594
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
594
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
594
      break;
752
0
    }
753
288
    case Type::DCT32X64: {
754
288
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
288
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
288
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
288
      break;
758
0
    }
759
3.80k
    case Type::DCT64X64: {
760
3.80k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
3.80k
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
3.80k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
3.80k
      break;
764
0
    }
765
0
    case Type::DCT128X64: {
766
0
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
0
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
0
      break;
770
0
    }
771
0
    case Type::DCT64X128: {
772
0
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
0
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
0
      break;
776
0
    }
777
0
    case Type::DCT128X128: {
778
0
      ReinterpretingDCT<
779
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
0
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
0
    case Type::DCT256X256: {
799
0
      ReinterpretingDCT<
800
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
0
      break;
804
0
    }
805
712k
    case Type::DCT:
806
1.12M
    case Type::DCT2X2:
807
1.12M
    case Type::DCT4X4:
808
1.18M
    case Type::DCT4X8:
809
1.28M
    case Type::DCT8X4:
810
1.32M
    case Type::AFV0:
811
1.34M
    case Type::AFV1:
812
1.37M
    case Type::AFV2:
813
1.42M
    case Type::AFV3:
814
2.11M
    case Type::IDENTITY:
815
2.11M
      llf[0] = dc[0];
816
2.11M
      break;
817
2.38M
  };
818
2.38M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
694
2.38M
                                              float* JXL_RESTRICT scratch) {
695
2.38M
  using Type = AcStrategyType;
696
2.38M
  HWY_ALIGN float warm_block[4 * 4];
697
2.38M
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
2.38M
  switch (strategy) {
699
58.8k
    case Type::DCT16X8: {
700
58.8k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
58.8k
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
58.8k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
58.8k
      break;
704
0
    }
705
74.7k
    case Type::DCT8X16: {
706
74.7k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
74.7k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
74.7k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
74.7k
      break;
710
0
    }
711
50.5k
    case Type::DCT16X16: {
712
50.5k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
50.5k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
50.5k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
50.5k
      break;
716
0
    }
717
0
    case Type::DCT32X8: {
718
0
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
0
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
0
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
0
      break;
722
0
    }
723
0
    case Type::DCT8X32: {
724
0
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
0
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
0
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
0
      break;
728
0
    }
729
19.3k
    case Type::DCT32X16: {
730
19.3k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
19.3k
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
19.3k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
19.3k
      break;
734
0
    }
735
23.0k
    case Type::DCT16X32: {
736
23.0k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
23.0k
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
23.0k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
23.0k
      break;
740
0
    }
741
46.8k
    case Type::DCT32X32: {
742
46.8k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
46.8k
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
46.8k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
46.8k
      break;
746
0
    }
747
594
    case Type::DCT64X32: {
748
594
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
594
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
594
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
594
      break;
752
0
    }
753
288
    case Type::DCT32X64: {
754
288
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
288
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
288
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
288
      break;
758
0
    }
759
3.80k
    case Type::DCT64X64: {
760
3.80k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
3.80k
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
3.80k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
3.80k
      break;
764
0
    }
765
0
    case Type::DCT128X64: {
766
0
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
0
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
0
      break;
770
0
    }
771
0
    case Type::DCT64X128: {
772
0
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
0
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
0
      break;
776
0
    }
777
0
    case Type::DCT128X128: {
778
0
      ReinterpretingDCT<
779
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
0
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
0
    case Type::DCT256X256: {
799
0
      ReinterpretingDCT<
800
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
0
      break;
804
0
    }
805
712k
    case Type::DCT:
806
1.12M
    case Type::DCT2X2:
807
1.12M
    case Type::DCT4X4:
808
1.18M
    case Type::DCT4X8:
809
1.28M
    case Type::DCT8X4:
810
1.32M
    case Type::AFV0:
811
1.34M
    case Type::AFV1:
812
1.37M
    case Type::AFV2:
813
1.42M
    case Type::AFV3:
814
2.11M
    case Type::IDENTITY:
815
2.11M
      llf[0] = dc[0];
816
2.11M
      break;
817
2.38M
  };
818
2.38M
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
819
820
}  // namespace
821
// NOLINTNEXTLINE(google-readability-namespace-comments)
822
}  // namespace HWY_NAMESPACE
823
}  // namespace jxl
824
HWY_AFTER_NAMESPACE();
825
826
#endif  // LIB_JXL_DEC_TRANSFORMS_INL_H_