Coverage Report

Created: 2025-06-16 07:00

/src/libjxl/lib/jxl/dec_transforms-inl.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include <cstring>
7
8
#include "lib/jxl/base/compiler_specific.h"
9
#include "lib/jxl/frame_dimensions.h"
10
11
#if defined(LIB_JXL_DEC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
12
#ifdef LIB_JXL_DEC_TRANSFORMS_INL_H_
13
#undef LIB_JXL_DEC_TRANSFORMS_INL_H_
14
#else
15
#define LIB_JXL_DEC_TRANSFORMS_INL_H_
16
#endif
17
18
#include <cstddef>
19
#include <hwy/highway.h>
20
21
#include "lib/jxl/ac_strategy.h"
22
#include "lib/jxl/dct-inl.h"
23
#include "lib/jxl/dct_scales.h"
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
namespace HWY_NAMESPACE {
27
namespace {
28
29
// These templates are not found via ADL.
30
using hwy::HWY_NAMESPACE::MulAdd;
31
32
// Computes the lowest-frequency LF_ROWSxLF_COLS-sized square in output, which
33
// is a DCT_ROWS*DCT_COLS-sized DCT block, by doing a ROWS*COLS DCT on the
34
// input block.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
JXL_INLINE void ReinterpretingDCT(const float* input, const size_t input_stride,
38
                                  float* output, const size_t output_stride,
39
                                  float* JXL_RESTRICT block,
40
300k
                                  float* JXL_RESTRICT scratch_space) {
41
300k
  static_assert(LF_ROWS == ROWS,
42
300k
                "ReinterpretingDCT should only be called with LF == N");
43
300k
  static_assert(LF_COLS == COLS,
44
300k
                "ReinterpretingDCT should only be called with LF == N");
45
300k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
300k
                                 scratch_space);
47
300k
  if (ROWS < COLS) {
48
232k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
473k
      for (size_t x = 0; x < LF_COLS; x++) {
50
346k
        output[y * output_stride + x] =
51
346k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
346k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
346k
      }
54
127k
    }
55
195k
  } else {
56
603k
    for (size_t y = 0; y < LF_COLS; y++) {
57
1.77M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.36M
        output[y * output_stride + x] =
59
1.36M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.36M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.36M
      }
62
408k
    }
63
195k
  }
64
300k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
79.2k
                                  float* JXL_RESTRICT scratch_space) {
41
79.2k
  static_assert(LF_ROWS == ROWS,
42
79.2k
                "ReinterpretingDCT should only be called with LF == N");
43
79.2k
  static_assert(LF_COLS == COLS,
44
79.2k
                "ReinterpretingDCT should only be called with LF == N");
45
79.2k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
79.2k
                                 scratch_space);
47
79.2k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
79.2k
  } else {
56
158k
    for (size_t y = 0; y < LF_COLS; y++) {
57
237k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
158k
        output[y * output_stride + x] =
59
158k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
158k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
158k
      }
62
79.2k
    }
63
79.2k
  }
64
79.2k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
83.9k
                                  float* JXL_RESTRICT scratch_space) {
41
83.9k
  static_assert(LF_ROWS == ROWS,
42
83.9k
                "ReinterpretingDCT should only be called with LF == N");
43
83.9k
  static_assert(LF_COLS == COLS,
44
83.9k
                "ReinterpretingDCT should only be called with LF == N");
45
83.9k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
83.9k
                                 scratch_space);
47
83.9k
  if (ROWS < COLS) {
48
167k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
251k
      for (size_t x = 0; x < LF_COLS; x++) {
50
167k
        output[y * output_stride + x] =
51
167k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
167k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
167k
      }
54
83.9k
    }
55
83.9k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
83.9k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
58.0k
                                  float* JXL_RESTRICT scratch_space) {
41
58.0k
  static_assert(LF_ROWS == ROWS,
42
58.0k
                "ReinterpretingDCT should only be called with LF == N");
43
58.0k
  static_assert(LF_COLS == COLS,
44
58.0k
                "ReinterpretingDCT should only be called with LF == N");
45
58.0k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
58.0k
                                 scratch_space);
47
58.0k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
58.0k
  } else {
56
174k
    for (size_t y = 0; y < LF_COLS; y++) {
57
348k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
232k
        output[y * output_stride + x] =
59
232k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
232k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
232k
      }
62
116k
    }
63
58.0k
  }
64
58.0k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
6
                                  float* JXL_RESTRICT scratch_space) {
41
6
  static_assert(LF_ROWS == ROWS,
42
6
                "ReinterpretingDCT should only be called with LF == N");
43
6
  static_assert(LF_COLS == COLS,
44
6
                "ReinterpretingDCT should only be called with LF == N");
45
6
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
6
                                 scratch_space);
47
6
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
6
  } else {
56
12
    for (size_t y = 0; y < LF_COLS; y++) {
57
30
      for (size_t x = 0; x < LF_ROWS; x++) {
58
24
        output[y * output_stride + x] =
59
24
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
24
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
24
      }
62
6
    }
63
6
  }
64
6
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
27
                                  float* JXL_RESTRICT scratch_space) {
41
27
  static_assert(LF_ROWS == ROWS,
42
27
                "ReinterpretingDCT should only be called with LF == N");
43
27
  static_assert(LF_COLS == COLS,
44
27
                "ReinterpretingDCT should only be called with LF == N");
45
27
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
27
                                 scratch_space);
47
27
  if (ROWS < COLS) {
48
54
    for (size_t y = 0; y < LF_ROWS; y++) {
49
135
      for (size_t x = 0; x < LF_COLS; x++) {
50
108
        output[y * output_stride + x] =
51
108
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
108
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
108
      }
54
27
    }
55
27
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
27
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
16.7k
                                  float* JXL_RESTRICT scratch_space) {
41
16.7k
  static_assert(LF_ROWS == ROWS,
42
16.7k
                "ReinterpretingDCT should only be called with LF == N");
43
16.7k
  static_assert(LF_COLS == COLS,
44
16.7k
                "ReinterpretingDCT should only be called with LF == N");
45
16.7k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
16.7k
                                 scratch_space);
47
16.7k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
16.7k
  } else {
56
50.1k
    for (size_t y = 0; y < LF_COLS; y++) {
57
167k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
133k
        output[y * output_stride + x] =
59
133k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
133k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
133k
      }
62
33.4k
    }
63
16.7k
  }
64
16.7k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
20.8k
                                  float* JXL_RESTRICT scratch_space) {
41
20.8k
  static_assert(LF_ROWS == ROWS,
42
20.8k
                "ReinterpretingDCT should only be called with LF == N");
43
20.8k
  static_assert(LF_COLS == COLS,
44
20.8k
                "ReinterpretingDCT should only be called with LF == N");
45
20.8k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
20.8k
                                 scratch_space);
47
20.8k
  if (ROWS < COLS) {
48
62.4k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
208k
      for (size_t x = 0; x < LF_COLS; x++) {
50
166k
        output[y * output_stride + x] =
51
166k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
166k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
166k
      }
54
41.6k
    }
55
20.8k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
20.8k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
37.0k
                                  float* JXL_RESTRICT scratch_space) {
41
37.0k
  static_assert(LF_ROWS == ROWS,
42
37.0k
                "ReinterpretingDCT should only be called with LF == N");
43
37.0k
  static_assert(LF_COLS == COLS,
44
37.0k
                "ReinterpretingDCT should only be called with LF == N");
45
37.0k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
37.0k
                                 scratch_space);
47
37.0k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
37.0k
  } else {
56
185k
    for (size_t y = 0; y < LF_COLS; y++) {
57
740k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
592k
        output[y * output_stride + x] =
59
592k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
592k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
592k
      }
62
148k
    }
63
37.0k
  }
64
37.0k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
489
                                  float* JXL_RESTRICT scratch_space) {
41
489
  static_assert(LF_ROWS == ROWS,
42
489
                "ReinterpretingDCT should only be called with LF == N");
43
489
  static_assert(LF_COLS == COLS,
44
489
                "ReinterpretingDCT should only be called with LF == N");
45
489
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
489
                                 scratch_space);
47
489
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
489
  } else {
56
2.44k
    for (size_t y = 0; y < LF_COLS; y++) {
57
17.6k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
15.6k
        output[y * output_stride + x] =
59
15.6k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
15.6k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
15.6k
      }
62
1.95k
    }
63
489
  }
64
489
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
375
                                  float* JXL_RESTRICT scratch_space) {
41
375
  static_assert(LF_ROWS == ROWS,
42
375
                "ReinterpretingDCT should only be called with LF == N");
43
375
  static_assert(LF_COLS == COLS,
44
375
                "ReinterpretingDCT should only be called with LF == N");
45
375
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
375
                                 scratch_space);
47
375
  if (ROWS < COLS) {
48
1.87k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
13.5k
      for (size_t x = 0; x < LF_COLS; x++) {
50
12.0k
        output[y * output_stride + x] =
51
12.0k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
12.0k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
12.0k
      }
54
1.50k
    }
55
375
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
375
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
3.69k
                                  float* JXL_RESTRICT scratch_space) {
41
3.69k
  static_assert(LF_ROWS == ROWS,
42
3.69k
                "ReinterpretingDCT should only be called with LF == N");
43
3.69k
  static_assert(LF_COLS == COLS,
44
3.69k
                "ReinterpretingDCT should only be called with LF == N");
45
3.69k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
3.69k
                                 scratch_space);
47
3.69k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
3.69k
  } else {
56
33.2k
    for (size_t y = 0; y < LF_COLS; y++) {
57
266k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
236k
        output[y * output_stride + x] =
59
236k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
236k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
236k
      }
62
29.5k
    }
63
3.69k
  }
64
3.69k
}
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
65
66
template <size_t S>
67
5.48M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
5.48M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
5.48M
  static_assert(S % 2 == 0, "S should be even");
70
5.48M
  float temp[kDCTBlockSize];
71
5.48M
  constexpr size_t num_2x2 = S / 2;
72
18.2M
  for (size_t y = 0; y < num_2x2; y++) {
73
51.1M
    for (size_t x = 0; x < num_2x2; x++) {
74
38.3M
      float c00 = block[y * kBlockDim + x];
75
38.3M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
38.3M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
38.3M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
38.3M
      float r00 = c00 + c01 + c10 + c11;
79
38.3M
      float r01 = c00 + c01 - c10 - c11;
80
38.3M
      float r10 = c00 - c01 + c10 - c11;
81
38.3M
      float r11 = c00 - c01 - c10 + c11;
82
38.3M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
38.3M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
38.3M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
38.3M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
38.3M
    }
87
12.7M
  }
88
31.0M
  for (size_t y = 0; y < S; y++) {
89
179M
    for (size_t x = 0; x < S; x++) {
90
153M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
153M
    }
92
25.5M
  }
93
5.48M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.19M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
1.19M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.19M
  static_assert(S % 2 == 0, "S should be even");
70
1.19M
  float temp[kDCTBlockSize];
71
1.19M
  constexpr size_t num_2x2 = S / 2;
72
2.38M
  for (size_t y = 0; y < num_2x2; y++) {
73
2.38M
    for (size_t x = 0; x < num_2x2; x++) {
74
1.19M
      float c00 = block[y * kBlockDim + x];
75
1.19M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
1.19M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
1.19M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
1.19M
      float r00 = c00 + c01 + c10 + c11;
79
1.19M
      float r01 = c00 + c01 - c10 - c11;
80
1.19M
      float r10 = c00 - c01 + c10 - c11;
81
1.19M
      float r11 = c00 - c01 - c10 + c11;
82
1.19M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
1.19M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
1.19M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
1.19M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
1.19M
    }
87
1.19M
  }
88
3.58M
  for (size_t y = 0; y < S; y++) {
89
7.16M
    for (size_t x = 0; x < S; x++) {
90
4.77M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
4.77M
    }
92
2.38M
  }
93
1.19M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.19M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
1.19M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.19M
  static_assert(S % 2 == 0, "S should be even");
70
1.19M
  float temp[kDCTBlockSize];
71
1.19M
  constexpr size_t num_2x2 = S / 2;
72
3.58M
  for (size_t y = 0; y < num_2x2; y++) {
73
7.16M
    for (size_t x = 0; x < num_2x2; x++) {
74
4.77M
      float c00 = block[y * kBlockDim + x];
75
4.77M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
4.77M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
4.77M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
4.77M
      float r00 = c00 + c01 + c10 + c11;
79
4.77M
      float r01 = c00 + c01 - c10 - c11;
80
4.77M
      float r10 = c00 - c01 + c10 - c11;
81
4.77M
      float r11 = c00 - c01 - c10 + c11;
82
4.77M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
4.77M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
4.77M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
4.77M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
4.77M
    }
87
2.38M
  }
88
5.97M
  for (size_t y = 0; y < S; y++) {
89
23.8M
    for (size_t x = 0; x < S; x++) {
90
19.1M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
19.1M
    }
92
4.77M
  }
93
1.19M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.19M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
1.19M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.19M
  static_assert(S % 2 == 0, "S should be even");
70
1.19M
  float temp[kDCTBlockSize];
71
1.19M
  constexpr size_t num_2x2 = S / 2;
72
5.97M
  for (size_t y = 0; y < num_2x2; y++) {
73
23.8M
    for (size_t x = 0; x < num_2x2; x++) {
74
19.1M
      float c00 = block[y * kBlockDim + x];
75
19.1M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
19.1M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
19.1M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
19.1M
      float r00 = c00 + c01 + c10 + c11;
79
19.1M
      float r01 = c00 + c01 - c10 - c11;
80
19.1M
      float r10 = c00 - c01 + c10 - c11;
81
19.1M
      float r11 = c00 - c01 - c10 + c11;
82
19.1M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
19.1M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
19.1M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
19.1M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
19.1M
    }
87
4.77M
  }
88
10.7M
  for (size_t y = 0; y < S; y++) {
89
85.9M
    for (size_t x = 0; x < S; x++) {
90
76.4M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
76.4M
    }
92
9.55M
  }
93
1.19M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
633k
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
633k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
633k
  static_assert(S % 2 == 0, "S should be even");
70
633k
  float temp[kDCTBlockSize];
71
633k
  constexpr size_t num_2x2 = S / 2;
72
1.26M
  for (size_t y = 0; y < num_2x2; y++) {
73
1.26M
    for (size_t x = 0; x < num_2x2; x++) {
74
633k
      float c00 = block[y * kBlockDim + x];
75
633k
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
633k
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
633k
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
633k
      float r00 = c00 + c01 + c10 + c11;
79
633k
      float r01 = c00 + c01 - c10 - c11;
80
633k
      float r10 = c00 - c01 + c10 - c11;
81
633k
      float r11 = c00 - c01 - c10 + c11;
82
633k
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
633k
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
633k
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
633k
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
633k
    }
87
633k
  }
88
1.89M
  for (size_t y = 0; y < S; y++) {
89
3.79M
    for (size_t x = 0; x < S; x++) {
90
2.53M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
2.53M
    }
92
1.26M
  }
93
633k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
633k
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
633k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
633k
  static_assert(S % 2 == 0, "S should be even");
70
633k
  float temp[kDCTBlockSize];
71
633k
  constexpr size_t num_2x2 = S / 2;
72
1.89M
  for (size_t y = 0; y < num_2x2; y++) {
73
3.79M
    for (size_t x = 0; x < num_2x2; x++) {
74
2.53M
      float c00 = block[y * kBlockDim + x];
75
2.53M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
2.53M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
2.53M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
2.53M
      float r00 = c00 + c01 + c10 + c11;
79
2.53M
      float r01 = c00 + c01 - c10 - c11;
80
2.53M
      float r10 = c00 - c01 + c10 - c11;
81
2.53M
      float r11 = c00 - c01 - c10 + c11;
82
2.53M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
2.53M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
2.53M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
2.53M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
2.53M
    }
87
1.26M
  }
88
3.16M
  for (size_t y = 0; y < S; y++) {
89
12.6M
    for (size_t x = 0; x < S; x++) {
90
10.1M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
10.1M
    }
92
2.53M
  }
93
633k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
633k
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
633k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
633k
  static_assert(S % 2 == 0, "S should be even");
70
633k
  float temp[kDCTBlockSize];
71
633k
  constexpr size_t num_2x2 = S / 2;
72
3.16M
  for (size_t y = 0; y < num_2x2; y++) {
73
12.6M
    for (size_t x = 0; x < num_2x2; x++) {
74
10.1M
      float c00 = block[y * kBlockDim + x];
75
10.1M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
10.1M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
10.1M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
10.1M
      float r00 = c00 + c01 + c10 + c11;
79
10.1M
      float r01 = c00 + c01 - c10 - c11;
80
10.1M
      float r10 = c00 - c01 + c10 - c11;
81
10.1M
      float r11 = c00 - c01 - c10 + c11;
82
10.1M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
10.1M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
10.1M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
10.1M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
10.1M
    }
87
2.53M
  }
88
5.69M
  for (size_t y = 0; y < S; y++) {
89
45.5M
    for (size_t x = 0; x < S; x++) {
90
40.5M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
40.5M
    }
92
5.06M
  }
93
633k
}
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
94
95
4.99M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
4.99M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
4.99M
      {
98
4.99M
          0.25,
99
4.99M
          0.25,
100
4.99M
          0.25,
101
4.99M
          0.25,
102
4.99M
          0.25,
103
4.99M
          0.25,
104
4.99M
          0.25,
105
4.99M
          0.25,
106
4.99M
          0.25,
107
4.99M
          0.25,
108
4.99M
          0.25,
109
4.99M
          0.25,
110
4.99M
          0.25,
111
4.99M
          0.25,
112
4.99M
          0.25,
113
4.99M
          0.25,
114
4.99M
      },
115
4.99M
      {
116
4.99M
          0.876902929799142f,
117
4.99M
          0.2206518106944235f,
118
4.99M
          -0.10140050393753763f,
119
4.99M
          -0.1014005039375375f,
120
4.99M
          0.2206518106944236f,
121
4.99M
          -0.10140050393753777f,
122
4.99M
          -0.10140050393753772f,
123
4.99M
          -0.10140050393753763f,
124
4.99M
          -0.10140050393753758f,
125
4.99M
          -0.10140050393753769f,
126
4.99M
          -0.1014005039375375f,
127
4.99M
          -0.10140050393753768f,
128
4.99M
          -0.10140050393753768f,
129
4.99M
          -0.10140050393753759f,
130
4.99M
          -0.10140050393753763f,
131
4.99M
          -0.10140050393753741f,
132
4.99M
      },
133
4.99M
      {
134
4.99M
          0.0,
135
4.99M
          0.0,
136
4.99M
          0.40670075830260755f,
137
4.99M
          0.44444816619734445f,
138
4.99M
          0.0,
139
4.99M
          0.0,
140
4.99M
          0.19574399372042936f,
141
4.99M
          0.2929100136981264f,
142
4.99M
          -0.40670075830260716f,
143
4.99M
          -0.19574399372042872f,
144
4.99M
          0.0,
145
4.99M
          0.11379074460448091f,
146
4.99M
          -0.44444816619734384f,
147
4.99M
          -0.29291001369812636f,
148
4.99M
          -0.1137907446044814f,
149
4.99M
          0.0,
150
4.99M
      },
151
4.99M
      {
152
4.99M
          0.0,
153
4.99M
          0.0,
154
4.99M
          -0.21255748058288748f,
155
4.99M
          0.3085497062849767f,
156
4.99M
          0.0,
157
4.99M
          0.4706702258572536f,
158
4.99M
          -0.1621205195722993f,
159
4.99M
          0.0,
160
4.99M
          -0.21255748058287047f,
161
4.99M
          -0.16212051957228327f,
162
4.99M
          -0.47067022585725277f,
163
4.99M
          -0.1464291867126764f,
164
4.99M
          0.3085497062849487f,
165
4.99M
          0.0,
166
4.99M
          -0.14642918671266536f,
167
4.99M
          0.4251149611657548f,
168
4.99M
      },
169
4.99M
      {
170
4.99M
          0.0,
171
4.99M
          -0.7071067811865474f,
172
4.99M
          0.0,
173
4.99M
          0.0,
174
4.99M
          0.7071067811865476f,
175
4.99M
          0.0,
176
4.99M
          0.0,
177
4.99M
          0.0,
178
4.99M
          0.0,
179
4.99M
          0.0,
180
4.99M
          0.0,
181
4.99M
          0.0,
182
4.99M
          0.0,
183
4.99M
          0.0,
184
4.99M
          0.0,
185
4.99M
          0.0,
186
4.99M
      },
187
4.99M
      {
188
4.99M
          -0.4105377591765233f,
189
4.99M
          0.6235485373547691f,
190
4.99M
          -0.06435071657946274f,
191
4.99M
          -0.06435071657946266f,
192
4.99M
          0.6235485373547694f,
193
4.99M
          -0.06435071657946284f,
194
4.99M
          -0.0643507165794628f,
195
4.99M
          -0.06435071657946274f,
196
4.99M
          -0.06435071657946272f,
197
4.99M
          -0.06435071657946279f,
198
4.99M
          -0.06435071657946266f,
199
4.99M
          -0.06435071657946277f,
200
4.99M
          -0.06435071657946277f,
201
4.99M
          -0.06435071657946273f,
202
4.99M
          -0.06435071657946274f,
203
4.99M
          -0.0643507165794626f,
204
4.99M
      },
205
4.99M
      {
206
4.99M
          0.0,
207
4.99M
          0.0,
208
4.99M
          -0.4517556589999482f,
209
4.99M
          0.15854503551840063f,
210
4.99M
          0.0,
211
4.99M
          -0.04038515160822202f,
212
4.99M
          0.0074182263792423875f,
213
4.99M
          0.39351034269210167f,
214
4.99M
          -0.45175565899994635f,
215
4.99M
          0.007418226379244351f,
216
4.99M
          0.1107416575309343f,
217
4.99M
          0.08298163094882051f,
218
4.99M
          0.15854503551839705f,
219
4.99M
          0.3935103426921022f,
220
4.99M
          0.0829816309488214f,
221
4.99M
          -0.45175565899994796f,
222
4.99M
      },
223
4.99M
      {
224
4.99M
          0.0,
225
4.99M
          0.0,
226
4.99M
          -0.304684750724869f,
227
4.99M
          0.5112616136591823f,
228
4.99M
          0.0,
229
4.99M
          0.0,
230
4.99M
          -0.290480129728998f,
231
4.99M
          -0.06578701549142804f,
232
4.99M
          0.304684750724884f,
233
4.99M
          0.2904801297290076f,
234
4.99M
          0.0,
235
4.99M
          -0.23889773523344604f,
236
4.99M
          -0.5112616136592012f,
237
4.99M
          0.06578701549142545f,
238
4.99M
          0.23889773523345467f,
239
4.99M
          0.0,
240
4.99M
      },
241
4.99M
      {
242
4.99M
          0.0,
243
4.99M
          0.0,
244
4.99M
          0.3017929516615495f,
245
4.99M
          0.25792362796341184f,
246
4.99M
          0.0,
247
4.99M
          0.16272340142866204f,
248
4.99M
          0.09520022653475037f,
249
4.99M
          0.0,
250
4.99M
          0.3017929516615503f,
251
4.99M
          0.09520022653475055f,
252
4.99M
          -0.16272340142866173f,
253
4.99M
          -0.35312385449816297f,
254
4.99M
          0.25792362796341295f,
255
4.99M
          0.0,
256
4.99M
          -0.3531238544981624f,
257
4.99M
          -0.6035859033230976f,
258
4.99M
      },
259
4.99M
      {
260
4.99M
          0.0,
261
4.99M
          0.0,
262
4.99M
          0.40824829046386274f,
263
4.99M
          0.0,
264
4.99M
          0.0,
265
4.99M
          0.0,
266
4.99M
          0.0,
267
4.99M
          -0.4082482904638628f,
268
4.99M
          -0.4082482904638635f,
269
4.99M
          0.0,
270
4.99M
          0.0,
271
4.99M
          -0.40824829046386296f,
272
4.99M
          0.0,
273
4.99M
          0.4082482904638634f,
274
4.99M
          0.408248290463863f,
275
4.99M
          0.0,
276
4.99M
      },
277
4.99M
      {
278
4.99M
          0.0,
279
4.99M
          0.0,
280
4.99M
          0.1747866975480809f,
281
4.99M
          0.0812611176717539f,
282
4.99M
          0.0,
283
4.99M
          0.0,
284
4.99M
          -0.3675398009862027f,
285
4.99M
          -0.307882213957909f,
286
4.99M
          -0.17478669754808135f,
287
4.99M
          0.3675398009862011f,
288
4.99M
          0.0,
289
4.99M
          0.4826689115059883f,
290
4.99M
          -0.08126111767175039f,
291
4.99M
          0.30788221395790305f,
292
4.99M
          -0.48266891150598584f,
293
4.99M
          0.0,
294
4.99M
      },
295
4.99M
      {
296
4.99M
          0.0,
297
4.99M
          0.0,
298
4.99M
          -0.21105601049335784f,
299
4.99M
          0.18567180916109802f,
300
4.99M
          0.0,
301
4.99M
          0.0,
302
4.99M
          0.49215859013738733f,
303
4.99M
          -0.38525013709251915f,
304
4.99M
          0.21105601049335806f,
305
4.99M
          -0.49215859013738905f,
306
4.99M
          0.0,
307
4.99M
          0.17419412659916217f,
308
4.99M
          -0.18567180916109904f,
309
4.99M
          0.3852501370925211f,
310
4.99M
          -0.1741941265991621f,
311
4.99M
          0.0,
312
4.99M
      },
313
4.99M
      {
314
4.99M
          0.0,
315
4.99M
          0.0,
316
4.99M
          -0.14266084808807264f,
317
4.99M
          -0.3416446842253372f,
318
4.99M
          0.0,
319
4.99M
          0.7367497537172237f,
320
4.99M
          0.24627107722075148f,
321
4.99M
          -0.08574019035519306f,
322
4.99M
          -0.14266084808807344f,
323
4.99M
          0.24627107722075137f,
324
4.99M
          0.14883399227113567f,
325
4.99M
          -0.04768680350229251f,
326
4.99M
          -0.3416446842253373f,
327
4.99M
          -0.08574019035519267f,
328
4.99M
          -0.047686803502292804f,
329
4.99M
          -0.14266084808807242f,
330
4.99M
      },
331
4.99M
      {
332
4.99M
          0.0,
333
4.99M
          0.0,
334
4.99M
          -0.13813540350758585f,
335
4.99M
          0.3302282550303788f,
336
4.99M
          0.0,
337
4.99M
          0.08755115000587084f,
338
4.99M
          -0.07946706605909573f,
339
4.99M
          -0.4613374887461511f,
340
4.99M
          -0.13813540350758294f,
341
4.99M
          -0.07946706605910261f,
342
4.99M
          0.49724647109535086f,
343
4.99M
          0.12538059448563663f,
344
4.99M
          0.3302282550303805f,
345
4.99M
          -0.4613374887461554f,
346
4.99M
          0.12538059448564315f,
347
4.99M
          -0.13813540350758452f,
348
4.99M
      },
349
4.99M
      {
350
4.99M
          0.0,
351
4.99M
          0.0,
352
4.99M
          -0.17437602599651067f,
353
4.99M
          0.0702790691196284f,
354
4.99M
          0.0,
355
4.99M
          -0.2921026642334881f,
356
4.99M
          0.3623817333531167f,
357
4.99M
          0.0,
358
4.99M
          -0.1743760259965108f,
359
4.99M
          0.36238173335311646f,
360
4.99M
          0.29210266423348785f,
361
4.99M
          -0.4326608024727445f,
362
4.99M
          0.07027906911962818f,
363
4.99M
          0.0,
364
4.99M
          -0.4326608024727457f,
365
4.99M
          0.34875205199302267f,
366
4.99M
      },
367
4.99M
      {
368
4.99M
          0.0,
369
4.99M
          0.0,
370
4.99M
          0.11354987314994337f,
371
4.99M
          -0.07417504595810355f,
372
4.99M
          0.0,
373
4.99M
          0.19402893032594343f,
374
4.99M
          -0.435190496523228f,
375
4.99M
          0.21918684838857466f,
376
4.99M
          0.11354987314994257f,
377
4.99M
          -0.4351904965232251f,
378
4.99M
          0.5550443808910661f,
379
4.99M
          -0.25468277124066463f,
380
4.99M
          -0.07417504595810233f,
381
4.99M
          0.2191868483885728f,
382
4.99M
          -0.25468277124066413f,
383
4.99M
          0.1135498731499429f,
384
4.99M
      },
385
4.99M
  };
386
387
4.99M
  const HWY_CAPPED(float, 16) d;
388
14.9M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
9.99M
    auto pixel = Zero(d);
390
169M
    for (size_t j = 0; j < 16; j++) {
391
159M
      auto cf = Set(d, coeffs[j]);
392
159M
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
159M
      pixel = MulAdd(cf, basis, pixel);
394
159M
    }
395
9.99M
    Store(pixel, d, pixels + i);
396
9.99M
  }
397
4.99M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
4.77M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
4.77M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
4.77M
      {
98
4.77M
          0.25,
99
4.77M
          0.25,
100
4.77M
          0.25,
101
4.77M
          0.25,
102
4.77M
          0.25,
103
4.77M
          0.25,
104
4.77M
          0.25,
105
4.77M
          0.25,
106
4.77M
          0.25,
107
4.77M
          0.25,
108
4.77M
          0.25,
109
4.77M
          0.25,
110
4.77M
          0.25,
111
4.77M
          0.25,
112
4.77M
          0.25,
113
4.77M
          0.25,
114
4.77M
      },
115
4.77M
      {
116
4.77M
          0.876902929799142f,
117
4.77M
          0.2206518106944235f,
118
4.77M
          -0.10140050393753763f,
119
4.77M
          -0.1014005039375375f,
120
4.77M
          0.2206518106944236f,
121
4.77M
          -0.10140050393753777f,
122
4.77M
          -0.10140050393753772f,
123
4.77M
          -0.10140050393753763f,
124
4.77M
          -0.10140050393753758f,
125
4.77M
          -0.10140050393753769f,
126
4.77M
          -0.1014005039375375f,
127
4.77M
          -0.10140050393753768f,
128
4.77M
          -0.10140050393753768f,
129
4.77M
          -0.10140050393753759f,
130
4.77M
          -0.10140050393753763f,
131
4.77M
          -0.10140050393753741f,
132
4.77M
      },
133
4.77M
      {
134
4.77M
          0.0,
135
4.77M
          0.0,
136
4.77M
          0.40670075830260755f,
137
4.77M
          0.44444816619734445f,
138
4.77M
          0.0,
139
4.77M
          0.0,
140
4.77M
          0.19574399372042936f,
141
4.77M
          0.2929100136981264f,
142
4.77M
          -0.40670075830260716f,
143
4.77M
          -0.19574399372042872f,
144
4.77M
          0.0,
145
4.77M
          0.11379074460448091f,
146
4.77M
          -0.44444816619734384f,
147
4.77M
          -0.29291001369812636f,
148
4.77M
          -0.1137907446044814f,
149
4.77M
          0.0,
150
4.77M
      },
151
4.77M
      {
152
4.77M
          0.0,
153
4.77M
          0.0,
154
4.77M
          -0.21255748058288748f,
155
4.77M
          0.3085497062849767f,
156
4.77M
          0.0,
157
4.77M
          0.4706702258572536f,
158
4.77M
          -0.1621205195722993f,
159
4.77M
          0.0,
160
4.77M
          -0.21255748058287047f,
161
4.77M
          -0.16212051957228327f,
162
4.77M
          -0.47067022585725277f,
163
4.77M
          -0.1464291867126764f,
164
4.77M
          0.3085497062849487f,
165
4.77M
          0.0,
166
4.77M
          -0.14642918671266536f,
167
4.77M
          0.4251149611657548f,
168
4.77M
      },
169
4.77M
      {
170
4.77M
          0.0,
171
4.77M
          -0.7071067811865474f,
172
4.77M
          0.0,
173
4.77M
          0.0,
174
4.77M
          0.7071067811865476f,
175
4.77M
          0.0,
176
4.77M
          0.0,
177
4.77M
          0.0,
178
4.77M
          0.0,
179
4.77M
          0.0,
180
4.77M
          0.0,
181
4.77M
          0.0,
182
4.77M
          0.0,
183
4.77M
          0.0,
184
4.77M
          0.0,
185
4.77M
          0.0,
186
4.77M
      },
187
4.77M
      {
188
4.77M
          -0.4105377591765233f,
189
4.77M
          0.6235485373547691f,
190
4.77M
          -0.06435071657946274f,
191
4.77M
          -0.06435071657946266f,
192
4.77M
          0.6235485373547694f,
193
4.77M
          -0.06435071657946284f,
194
4.77M
          -0.0643507165794628f,
195
4.77M
          -0.06435071657946274f,
196
4.77M
          -0.06435071657946272f,
197
4.77M
          -0.06435071657946279f,
198
4.77M
          -0.06435071657946266f,
199
4.77M
          -0.06435071657946277f,
200
4.77M
          -0.06435071657946277f,
201
4.77M
          -0.06435071657946273f,
202
4.77M
          -0.06435071657946274f,
203
4.77M
          -0.0643507165794626f,
204
4.77M
      },
205
4.77M
      {
206
4.77M
          0.0,
207
4.77M
          0.0,
208
4.77M
          -0.4517556589999482f,
209
4.77M
          0.15854503551840063f,
210
4.77M
          0.0,
211
4.77M
          -0.04038515160822202f,
212
4.77M
          0.0074182263792423875f,
213
4.77M
          0.39351034269210167f,
214
4.77M
          -0.45175565899994635f,
215
4.77M
          0.007418226379244351f,
216
4.77M
          0.1107416575309343f,
217
4.77M
          0.08298163094882051f,
218
4.77M
          0.15854503551839705f,
219
4.77M
          0.3935103426921022f,
220
4.77M
          0.0829816309488214f,
221
4.77M
          -0.45175565899994796f,
222
4.77M
      },
223
4.77M
      {
224
4.77M
          0.0,
225
4.77M
          0.0,
226
4.77M
          -0.304684750724869f,
227
4.77M
          0.5112616136591823f,
228
4.77M
          0.0,
229
4.77M
          0.0,
230
4.77M
          -0.290480129728998f,
231
4.77M
          -0.06578701549142804f,
232
4.77M
          0.304684750724884f,
233
4.77M
          0.2904801297290076f,
234
4.77M
          0.0,
235
4.77M
          -0.23889773523344604f,
236
4.77M
          -0.5112616136592012f,
237
4.77M
          0.06578701549142545f,
238
4.77M
          0.23889773523345467f,
239
4.77M
          0.0,
240
4.77M
      },
241
4.77M
      {
242
4.77M
          0.0,
243
4.77M
          0.0,
244
4.77M
          0.3017929516615495f,
245
4.77M
          0.25792362796341184f,
246
4.77M
          0.0,
247
4.77M
          0.16272340142866204f,
248
4.77M
          0.09520022653475037f,
249
4.77M
          0.0,
250
4.77M
          0.3017929516615503f,
251
4.77M
          0.09520022653475055f,
252
4.77M
          -0.16272340142866173f,
253
4.77M
          -0.35312385449816297f,
254
4.77M
          0.25792362796341295f,
255
4.77M
          0.0,
256
4.77M
          -0.3531238544981624f,
257
4.77M
          -0.6035859033230976f,
258
4.77M
      },
259
4.77M
      {
260
4.77M
          0.0,
261
4.77M
          0.0,
262
4.77M
          0.40824829046386274f,
263
4.77M
          0.0,
264
4.77M
          0.0,
265
4.77M
          0.0,
266
4.77M
          0.0,
267
4.77M
          -0.4082482904638628f,
268
4.77M
          -0.4082482904638635f,
269
4.77M
          0.0,
270
4.77M
          0.0,
271
4.77M
          -0.40824829046386296f,
272
4.77M
          0.0,
273
4.77M
          0.4082482904638634f,
274
4.77M
          0.408248290463863f,
275
4.77M
          0.0,
276
4.77M
      },
277
4.77M
      {
278
4.77M
          0.0,
279
4.77M
          0.0,
280
4.77M
          0.1747866975480809f,
281
4.77M
          0.0812611176717539f,
282
4.77M
          0.0,
283
4.77M
          0.0,
284
4.77M
          -0.3675398009862027f,
285
4.77M
          -0.307882213957909f,
286
4.77M
          -0.17478669754808135f,
287
4.77M
          0.3675398009862011f,
288
4.77M
          0.0,
289
4.77M
          0.4826689115059883f,
290
4.77M
          -0.08126111767175039f,
291
4.77M
          0.30788221395790305f,
292
4.77M
          -0.48266891150598584f,
293
4.77M
          0.0,
294
4.77M
      },
295
4.77M
      {
296
4.77M
          0.0,
297
4.77M
          0.0,
298
4.77M
          -0.21105601049335784f,
299
4.77M
          0.18567180916109802f,
300
4.77M
          0.0,
301
4.77M
          0.0,
302
4.77M
          0.49215859013738733f,
303
4.77M
          -0.38525013709251915f,
304
4.77M
          0.21105601049335806f,
305
4.77M
          -0.49215859013738905f,
306
4.77M
          0.0,
307
4.77M
          0.17419412659916217f,
308
4.77M
          -0.18567180916109904f,
309
4.77M
          0.3852501370925211f,
310
4.77M
          -0.1741941265991621f,
311
4.77M
          0.0,
312
4.77M
      },
313
4.77M
      {
314
4.77M
          0.0,
315
4.77M
          0.0,
316
4.77M
          -0.14266084808807264f,
317
4.77M
          -0.3416446842253372f,
318
4.77M
          0.0,
319
4.77M
          0.7367497537172237f,
320
4.77M
          0.24627107722075148f,
321
4.77M
          -0.08574019035519306f,
322
4.77M
          -0.14266084808807344f,
323
4.77M
          0.24627107722075137f,
324
4.77M
          0.14883399227113567f,
325
4.77M
          -0.04768680350229251f,
326
4.77M
          -0.3416446842253373f,
327
4.77M
          -0.08574019035519267f,
328
4.77M
          -0.047686803502292804f,
329
4.77M
          -0.14266084808807242f,
330
4.77M
      },
331
4.77M
      {
332
4.77M
          0.0,
333
4.77M
          0.0,
334
4.77M
          -0.13813540350758585f,
335
4.77M
          0.3302282550303788f,
336
4.77M
          0.0,
337
4.77M
          0.08755115000587084f,
338
4.77M
          -0.07946706605909573f,
339
4.77M
          -0.4613374887461511f,
340
4.77M
          -0.13813540350758294f,
341
4.77M
          -0.07946706605910261f,
342
4.77M
          0.49724647109535086f,
343
4.77M
          0.12538059448563663f,
344
4.77M
          0.3302282550303805f,
345
4.77M
          -0.4613374887461554f,
346
4.77M
          0.12538059448564315f,
347
4.77M
          -0.13813540350758452f,
348
4.77M
      },
349
4.77M
      {
350
4.77M
          0.0,
351
4.77M
          0.0,
352
4.77M
          -0.17437602599651067f,
353
4.77M
          0.0702790691196284f,
354
4.77M
          0.0,
355
4.77M
          -0.2921026642334881f,
356
4.77M
          0.3623817333531167f,
357
4.77M
          0.0,
358
4.77M
          -0.1743760259965108f,
359
4.77M
          0.36238173335311646f,
360
4.77M
          0.29210266423348785f,
361
4.77M
          -0.4326608024727445f,
362
4.77M
          0.07027906911962818f,
363
4.77M
          0.0,
364
4.77M
          -0.4326608024727457f,
365
4.77M
          0.34875205199302267f,
366
4.77M
      },
367
4.77M
      {
368
4.77M
          0.0,
369
4.77M
          0.0,
370
4.77M
          0.11354987314994337f,
371
4.77M
          -0.07417504595810355f,
372
4.77M
          0.0,
373
4.77M
          0.19402893032594343f,
374
4.77M
          -0.435190496523228f,
375
4.77M
          0.21918684838857466f,
376
4.77M
          0.11354987314994257f,
377
4.77M
          -0.4351904965232251f,
378
4.77M
          0.5550443808910661f,
379
4.77M
          -0.25468277124066463f,
380
4.77M
          -0.07417504595810233f,
381
4.77M
          0.2191868483885728f,
382
4.77M
          -0.25468277124066413f,
383
4.77M
          0.1135498731499429f,
384
4.77M
      },
385
4.77M
  };
386
387
4.77M
  const HWY_CAPPED(float, 16) d;
388
14.3M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
9.55M
    auto pixel = Zero(d);
390
162M
    for (size_t j = 0; j < 16; j++) {
391
152M
      auto cf = Set(d, coeffs[j]);
392
152M
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
152M
      pixel = MulAdd(cf, basis, pixel);
394
152M
    }
395
9.55M
    Store(pixel, d, pixels + i);
396
9.55M
  }
397
4.77M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
219k
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
219k
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
219k
      {
98
219k
          0.25,
99
219k
          0.25,
100
219k
          0.25,
101
219k
          0.25,
102
219k
          0.25,
103
219k
          0.25,
104
219k
          0.25,
105
219k
          0.25,
106
219k
          0.25,
107
219k
          0.25,
108
219k
          0.25,
109
219k
          0.25,
110
219k
          0.25,
111
219k
          0.25,
112
219k
          0.25,
113
219k
          0.25,
114
219k
      },
115
219k
      {
116
219k
          0.876902929799142f,
117
219k
          0.2206518106944235f,
118
219k
          -0.10140050393753763f,
119
219k
          -0.1014005039375375f,
120
219k
          0.2206518106944236f,
121
219k
          -0.10140050393753777f,
122
219k
          -0.10140050393753772f,
123
219k
          -0.10140050393753763f,
124
219k
          -0.10140050393753758f,
125
219k
          -0.10140050393753769f,
126
219k
          -0.1014005039375375f,
127
219k
          -0.10140050393753768f,
128
219k
          -0.10140050393753768f,
129
219k
          -0.10140050393753759f,
130
219k
          -0.10140050393753763f,
131
219k
          -0.10140050393753741f,
132
219k
      },
133
219k
      {
134
219k
          0.0,
135
219k
          0.0,
136
219k
          0.40670075830260755f,
137
219k
          0.44444816619734445f,
138
219k
          0.0,
139
219k
          0.0,
140
219k
          0.19574399372042936f,
141
219k
          0.2929100136981264f,
142
219k
          -0.40670075830260716f,
143
219k
          -0.19574399372042872f,
144
219k
          0.0,
145
219k
          0.11379074460448091f,
146
219k
          -0.44444816619734384f,
147
219k
          -0.29291001369812636f,
148
219k
          -0.1137907446044814f,
149
219k
          0.0,
150
219k
      },
151
219k
      {
152
219k
          0.0,
153
219k
          0.0,
154
219k
          -0.21255748058288748f,
155
219k
          0.3085497062849767f,
156
219k
          0.0,
157
219k
          0.4706702258572536f,
158
219k
          -0.1621205195722993f,
159
219k
          0.0,
160
219k
          -0.21255748058287047f,
161
219k
          -0.16212051957228327f,
162
219k
          -0.47067022585725277f,
163
219k
          -0.1464291867126764f,
164
219k
          0.3085497062849487f,
165
219k
          0.0,
166
219k
          -0.14642918671266536f,
167
219k
          0.4251149611657548f,
168
219k
      },
169
219k
      {
170
219k
          0.0,
171
219k
          -0.7071067811865474f,
172
219k
          0.0,
173
219k
          0.0,
174
219k
          0.7071067811865476f,
175
219k
          0.0,
176
219k
          0.0,
177
219k
          0.0,
178
219k
          0.0,
179
219k
          0.0,
180
219k
          0.0,
181
219k
          0.0,
182
219k
          0.0,
183
219k
          0.0,
184
219k
          0.0,
185
219k
          0.0,
186
219k
      },
187
219k
      {
188
219k
          -0.4105377591765233f,
189
219k
          0.6235485373547691f,
190
219k
          -0.06435071657946274f,
191
219k
          -0.06435071657946266f,
192
219k
          0.6235485373547694f,
193
219k
          -0.06435071657946284f,
194
219k
          -0.0643507165794628f,
195
219k
          -0.06435071657946274f,
196
219k
          -0.06435071657946272f,
197
219k
          -0.06435071657946279f,
198
219k
          -0.06435071657946266f,
199
219k
          -0.06435071657946277f,
200
219k
          -0.06435071657946277f,
201
219k
          -0.06435071657946273f,
202
219k
          -0.06435071657946274f,
203
219k
          -0.0643507165794626f,
204
219k
      },
205
219k
      {
206
219k
          0.0,
207
219k
          0.0,
208
219k
          -0.4517556589999482f,
209
219k
          0.15854503551840063f,
210
219k
          0.0,
211
219k
          -0.04038515160822202f,
212
219k
          0.0074182263792423875f,
213
219k
          0.39351034269210167f,
214
219k
          -0.45175565899994635f,
215
219k
          0.007418226379244351f,
216
219k
          0.1107416575309343f,
217
219k
          0.08298163094882051f,
218
219k
          0.15854503551839705f,
219
219k
          0.3935103426921022f,
220
219k
          0.0829816309488214f,
221
219k
          -0.45175565899994796f,
222
219k
      },
223
219k
      {
224
219k
          0.0,
225
219k
          0.0,
226
219k
          -0.304684750724869f,
227
219k
          0.5112616136591823f,
228
219k
          0.0,
229
219k
          0.0,
230
219k
          -0.290480129728998f,
231
219k
          -0.06578701549142804f,
232
219k
          0.304684750724884f,
233
219k
          0.2904801297290076f,
234
219k
          0.0,
235
219k
          -0.23889773523344604f,
236
219k
          -0.5112616136592012f,
237
219k
          0.06578701549142545f,
238
219k
          0.23889773523345467f,
239
219k
          0.0,
240
219k
      },
241
219k
      {
242
219k
          0.0,
243
219k
          0.0,
244
219k
          0.3017929516615495f,
245
219k
          0.25792362796341184f,
246
219k
          0.0,
247
219k
          0.16272340142866204f,
248
219k
          0.09520022653475037f,
249
219k
          0.0,
250
219k
          0.3017929516615503f,
251
219k
          0.09520022653475055f,
252
219k
          -0.16272340142866173f,
253
219k
          -0.35312385449816297f,
254
219k
          0.25792362796341295f,
255
219k
          0.0,
256
219k
          -0.3531238544981624f,
257
219k
          -0.6035859033230976f,
258
219k
      },
259
219k
      {
260
219k
          0.0,
261
219k
          0.0,
262
219k
          0.40824829046386274f,
263
219k
          0.0,
264
219k
          0.0,
265
219k
          0.0,
266
219k
          0.0,
267
219k
          -0.4082482904638628f,
268
219k
          -0.4082482904638635f,
269
219k
          0.0,
270
219k
          0.0,
271
219k
          -0.40824829046386296f,
272
219k
          0.0,
273
219k
          0.4082482904638634f,
274
219k
          0.408248290463863f,
275
219k
          0.0,
276
219k
      },
277
219k
      {
278
219k
          0.0,
279
219k
          0.0,
280
219k
          0.1747866975480809f,
281
219k
          0.0812611176717539f,
282
219k
          0.0,
283
219k
          0.0,
284
219k
          -0.3675398009862027f,
285
219k
          -0.307882213957909f,
286
219k
          -0.17478669754808135f,
287
219k
          0.3675398009862011f,
288
219k
          0.0,
289
219k
          0.4826689115059883f,
290
219k
          -0.08126111767175039f,
291
219k
          0.30788221395790305f,
292
219k
          -0.48266891150598584f,
293
219k
          0.0,
294
219k
      },
295
219k
      {
296
219k
          0.0,
297
219k
          0.0,
298
219k
          -0.21105601049335784f,
299
219k
          0.18567180916109802f,
300
219k
          0.0,
301
219k
          0.0,
302
219k
          0.49215859013738733f,
303
219k
          -0.38525013709251915f,
304
219k
          0.21105601049335806f,
305
219k
          -0.49215859013738905f,
306
219k
          0.0,
307
219k
          0.17419412659916217f,
308
219k
          -0.18567180916109904f,
309
219k
          0.3852501370925211f,
310
219k
          -0.1741941265991621f,
311
219k
          0.0,
312
219k
      },
313
219k
      {
314
219k
          0.0,
315
219k
          0.0,
316
219k
          -0.14266084808807264f,
317
219k
          -0.3416446842253372f,
318
219k
          0.0,
319
219k
          0.7367497537172237f,
320
219k
          0.24627107722075148f,
321
219k
          -0.08574019035519306f,
322
219k
          -0.14266084808807344f,
323
219k
          0.24627107722075137f,
324
219k
          0.14883399227113567f,
325
219k
          -0.04768680350229251f,
326
219k
          -0.3416446842253373f,
327
219k
          -0.08574019035519267f,
328
219k
          -0.047686803502292804f,
329
219k
          -0.14266084808807242f,
330
219k
      },
331
219k
      {
332
219k
          0.0,
333
219k
          0.0,
334
219k
          -0.13813540350758585f,
335
219k
          0.3302282550303788f,
336
219k
          0.0,
337
219k
          0.08755115000587084f,
338
219k
          -0.07946706605909573f,
339
219k
          -0.4613374887461511f,
340
219k
          -0.13813540350758294f,
341
219k
          -0.07946706605910261f,
342
219k
          0.49724647109535086f,
343
219k
          0.12538059448563663f,
344
219k
          0.3302282550303805f,
345
219k
          -0.4613374887461554f,
346
219k
          0.12538059448564315f,
347
219k
          -0.13813540350758452f,
348
219k
      },
349
219k
      {
350
219k
          0.0,
351
219k
          0.0,
352
219k
          -0.17437602599651067f,
353
219k
          0.0702790691196284f,
354
219k
          0.0,
355
219k
          -0.2921026642334881f,
356
219k
          0.3623817333531167f,
357
219k
          0.0,
358
219k
          -0.1743760259965108f,
359
219k
          0.36238173335311646f,
360
219k
          0.29210266423348785f,
361
219k
          -0.4326608024727445f,
362
219k
          0.07027906911962818f,
363
219k
          0.0,
364
219k
          -0.4326608024727457f,
365
219k
          0.34875205199302267f,
366
219k
      },
367
219k
      {
368
219k
          0.0,
369
219k
          0.0,
370
219k
          0.11354987314994337f,
371
219k
          -0.07417504595810355f,
372
219k
          0.0,
373
219k
          0.19402893032594343f,
374
219k
          -0.435190496523228f,
375
219k
          0.21918684838857466f,
376
219k
          0.11354987314994257f,
377
219k
          -0.4351904965232251f,
378
219k
          0.5550443808910661f,
379
219k
          -0.25468277124066463f,
380
219k
          -0.07417504595810233f,
381
219k
          0.2191868483885728f,
382
219k
          -0.25468277124066413f,
383
219k
          0.1135498731499429f,
384
219k
      },
385
219k
  };
386
387
219k
  const HWY_CAPPED(float, 16) d;
388
658k
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
438k
    auto pixel = Zero(d);
390
7.45M
    for (size_t j = 0; j < 16; j++) {
391
7.01M
      auto cf = Set(d, coeffs[j]);
392
7.01M
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
7.01M
      pixel = MulAdd(cf, basis, pixel);
394
7.01M
    }
395
438k
    Store(pixel, d, pixels + i);
396
438k
  }
397
219k
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
398
399
template <size_t afv_kind>
400
void AFVTransformToPixels(const float* JXL_RESTRICT coefficients,
401
4.99M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
4.99M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
4.99M
  size_t afv_x = afv_kind & 1;
404
4.99M
  size_t afv_y = afv_kind / 2;
405
4.99M
  float dcs[3] = {};
406
4.99M
  float block00 = coefficients[0];
407
4.99M
  float block01 = coefficients[1];
408
4.99M
  float block10 = coefficients[8];
409
4.99M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
4.99M
  dcs[1] = (block00 + block10 - block01);
411
4.99M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
4.99M
  HWY_ALIGN float coeff[4 * 4];
414
4.99M
  coeff[0] = dcs[0];
415
24.9M
  for (size_t iy = 0; iy < 4; iy++) {
416
99.9M
    for (size_t ix = 0; ix < 4; ix++) {
417
79.9M
      if (ix == 0 && iy == 0) continue;
418
74.9M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
74.9M
    }
420
19.9M
  }
421
4.99M
  HWY_ALIGN float block[4 * 8];
422
4.99M
  AFVIDCT4x4(coeff, block);
423
24.9M
  for (size_t iy = 0; iy < 4; iy++) {
424
99.9M
    for (size_t ix = 0; ix < 4; ix++) {
425
79.9M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
79.9M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
79.9M
    }
428
19.9M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
4.99M
  block[0] = dcs[1];
431
24.9M
  for (size_t iy = 0; iy < 4; iy++) {
432
99.9M
    for (size_t ix = 0; ix < 4; ix++) {
433
79.9M
      if (ix == 0 && iy == 0) continue;
434
74.9M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
74.9M
    }
436
19.9M
  }
437
4.99M
  ComputeScaledIDCT<4, 4>()(
438
4.99M
      block,
439
4.99M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
4.99M
            pixels_stride),
441
4.99M
      scratch_space);
442
  // IDCT4x8.
443
4.99M
  block[0] = dcs[2];
444
24.9M
  for (size_t iy = 0; iy < 4; iy++) {
445
179M
    for (size_t ix = 0; ix < 8; ix++) {
446
159M
      if (ix == 0 && iy == 0) continue;
447
154M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
154M
    }
449
19.9M
  }
450
4.99M
  ComputeScaledIDCT<4, 8>()(
451
4.99M
      block,
452
4.99M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
4.99M
      scratch_space);
454
4.99M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
1.19M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
1.19M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
1.19M
  size_t afv_x = afv_kind & 1;
404
1.19M
  size_t afv_y = afv_kind / 2;
405
1.19M
  float dcs[3] = {};
406
1.19M
  float block00 = coefficients[0];
407
1.19M
  float block01 = coefficients[1];
408
1.19M
  float block10 = coefficients[8];
409
1.19M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
1.19M
  dcs[1] = (block00 + block10 - block01);
411
1.19M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
1.19M
  HWY_ALIGN float coeff[4 * 4];
414
1.19M
  coeff[0] = dcs[0];
415
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
416
23.8M
    for (size_t ix = 0; ix < 4; ix++) {
417
19.1M
      if (ix == 0 && iy == 0) continue;
418
17.9M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
17.9M
    }
420
4.77M
  }
421
1.19M
  HWY_ALIGN float block[4 * 8];
422
1.19M
  AFVIDCT4x4(coeff, block);
423
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
424
23.8M
    for (size_t ix = 0; ix < 4; ix++) {
425
19.1M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
19.1M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
19.1M
    }
428
4.77M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
1.19M
  block[0] = dcs[1];
431
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
432
23.8M
    for (size_t ix = 0; ix < 4; ix++) {
433
19.1M
      if (ix == 0 && iy == 0) continue;
434
17.9M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
17.9M
    }
436
4.77M
  }
437
1.19M
  ComputeScaledIDCT<4, 4>()(
438
1.19M
      block,
439
1.19M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
1.19M
            pixels_stride),
441
1.19M
      scratch_space);
442
  // IDCT4x8.
443
1.19M
  block[0] = dcs[2];
444
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
445
42.9M
    for (size_t ix = 0; ix < 8; ix++) {
446
38.2M
      if (ix == 0 && iy == 0) continue;
447
37.0M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
37.0M
    }
449
4.77M
  }
450
1.19M
  ComputeScaledIDCT<4, 8>()(
451
1.19M
      block,
452
1.19M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
1.19M
      scratch_space);
454
1.19M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
1.19M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
1.19M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
1.19M
  size_t afv_x = afv_kind & 1;
404
1.19M
  size_t afv_y = afv_kind / 2;
405
1.19M
  float dcs[3] = {};
406
1.19M
  float block00 = coefficients[0];
407
1.19M
  float block01 = coefficients[1];
408
1.19M
  float block10 = coefficients[8];
409
1.19M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
1.19M
  dcs[1] = (block00 + block10 - block01);
411
1.19M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
1.19M
  HWY_ALIGN float coeff[4 * 4];
414
1.19M
  coeff[0] = dcs[0];
415
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
416
23.8M
    for (size_t ix = 0; ix < 4; ix++) {
417
19.1M
      if (ix == 0 && iy == 0) continue;
418
17.9M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
17.9M
    }
420
4.77M
  }
421
1.19M
  HWY_ALIGN float block[4 * 8];
422
1.19M
  AFVIDCT4x4(coeff, block);
423
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
424
23.8M
    for (size_t ix = 0; ix < 4; ix++) {
425
19.1M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
19.1M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
19.1M
    }
428
4.77M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
1.19M
  block[0] = dcs[1];
431
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
432
23.8M
    for (size_t ix = 0; ix < 4; ix++) {
433
19.1M
      if (ix == 0 && iy == 0) continue;
434
17.9M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
17.9M
    }
436
4.77M
  }
437
1.19M
  ComputeScaledIDCT<4, 4>()(
438
1.19M
      block,
439
1.19M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
1.19M
            pixels_stride),
441
1.19M
      scratch_space);
442
  // IDCT4x8.
443
1.19M
  block[0] = dcs[2];
444
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
445
42.9M
    for (size_t ix = 0; ix < 8; ix++) {
446
38.2M
      if (ix == 0 && iy == 0) continue;
447
37.0M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
37.0M
    }
449
4.77M
  }
450
1.19M
  ComputeScaledIDCT<4, 8>()(
451
1.19M
      block,
452
1.19M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
1.19M
      scratch_space);
454
1.19M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
1.19M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
1.19M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
1.19M
  size_t afv_x = afv_kind & 1;
404
1.19M
  size_t afv_y = afv_kind / 2;
405
1.19M
  float dcs[3] = {};
406
1.19M
  float block00 = coefficients[0];
407
1.19M
  float block01 = coefficients[1];
408
1.19M
  float block10 = coefficients[8];
409
1.19M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
1.19M
  dcs[1] = (block00 + block10 - block01);
411
1.19M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
1.19M
  HWY_ALIGN float coeff[4 * 4];
414
1.19M
  coeff[0] = dcs[0];
415
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
416
23.8M
    for (size_t ix = 0; ix < 4; ix++) {
417
19.1M
      if (ix == 0 && iy == 0) continue;
418
17.9M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
17.9M
    }
420
4.77M
  }
421
1.19M
  HWY_ALIGN float block[4 * 8];
422
1.19M
  AFVIDCT4x4(coeff, block);
423
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
424
23.8M
    for (size_t ix = 0; ix < 4; ix++) {
425
19.1M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
19.1M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
19.1M
    }
428
4.77M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
1.19M
  block[0] = dcs[1];
431
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
432
23.8M
    for (size_t ix = 0; ix < 4; ix++) {
433
19.1M
      if (ix == 0 && iy == 0) continue;
434
17.9M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
17.9M
    }
436
4.77M
  }
437
1.19M
  ComputeScaledIDCT<4, 4>()(
438
1.19M
      block,
439
1.19M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
1.19M
            pixels_stride),
441
1.19M
      scratch_space);
442
  // IDCT4x8.
443
1.19M
  block[0] = dcs[2];
444
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
445
42.9M
    for (size_t ix = 0; ix < 8; ix++) {
446
38.2M
      if (ix == 0 && iy == 0) continue;
447
37.0M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
37.0M
    }
449
4.77M
  }
450
1.19M
  ComputeScaledIDCT<4, 8>()(
451
1.19M
      block,
452
1.19M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
1.19M
      scratch_space);
454
1.19M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
1.19M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
1.19M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
1.19M
  size_t afv_x = afv_kind & 1;
404
1.19M
  size_t afv_y = afv_kind / 2;
405
1.19M
  float dcs[3] = {};
406
1.19M
  float block00 = coefficients[0];
407
1.19M
  float block01 = coefficients[1];
408
1.19M
  float block10 = coefficients[8];
409
1.19M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
1.19M
  dcs[1] = (block00 + block10 - block01);
411
1.19M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
1.19M
  HWY_ALIGN float coeff[4 * 4];
414
1.19M
  coeff[0] = dcs[0];
415
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
416
23.8M
    for (size_t ix = 0; ix < 4; ix++) {
417
19.1M
      if (ix == 0 && iy == 0) continue;
418
17.9M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
17.9M
    }
420
4.77M
  }
421
1.19M
  HWY_ALIGN float block[4 * 8];
422
1.19M
  AFVIDCT4x4(coeff, block);
423
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
424
23.8M
    for (size_t ix = 0; ix < 4; ix++) {
425
19.1M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
19.1M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
19.1M
    }
428
4.77M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
1.19M
  block[0] = dcs[1];
431
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
432
23.8M
    for (size_t ix = 0; ix < 4; ix++) {
433
19.1M
      if (ix == 0 && iy == 0) continue;
434
17.9M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
17.9M
    }
436
4.77M
  }
437
1.19M
  ComputeScaledIDCT<4, 4>()(
438
1.19M
      block,
439
1.19M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
1.19M
            pixels_stride),
441
1.19M
      scratch_space);
442
  // IDCT4x8.
443
1.19M
  block[0] = dcs[2];
444
5.97M
  for (size_t iy = 0; iy < 4; iy++) {
445
42.9M
    for (size_t ix = 0; ix < 8; ix++) {
446
38.2M
      if (ix == 0 && iy == 0) continue;
447
37.0M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
37.0M
    }
449
4.77M
  }
450
1.19M
  ComputeScaledIDCT<4, 8>()(
451
1.19M
      block,
452
1.19M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
1.19M
      scratch_space);
454
1.19M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
60.0k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
60.0k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
60.0k
  size_t afv_x = afv_kind & 1;
404
60.0k
  size_t afv_y = afv_kind / 2;
405
60.0k
  float dcs[3] = {};
406
60.0k
  float block00 = coefficients[0];
407
60.0k
  float block01 = coefficients[1];
408
60.0k
  float block10 = coefficients[8];
409
60.0k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
60.0k
  dcs[1] = (block00 + block10 - block01);
411
60.0k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
60.0k
  HWY_ALIGN float coeff[4 * 4];
414
60.0k
  coeff[0] = dcs[0];
415
300k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.20M
    for (size_t ix = 0; ix < 4; ix++) {
417
960k
      if (ix == 0 && iy == 0) continue;
418
900k
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
900k
    }
420
240k
  }
421
60.0k
  HWY_ALIGN float block[4 * 8];
422
60.0k
  AFVIDCT4x4(coeff, block);
423
300k
  for (size_t iy = 0; iy < 4; iy++) {
424
1.20M
    for (size_t ix = 0; ix < 4; ix++) {
425
960k
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
960k
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
960k
    }
428
240k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
60.0k
  block[0] = dcs[1];
431
300k
  for (size_t iy = 0; iy < 4; iy++) {
432
1.20M
    for (size_t ix = 0; ix < 4; ix++) {
433
960k
      if (ix == 0 && iy == 0) continue;
434
900k
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
900k
    }
436
240k
  }
437
60.0k
  ComputeScaledIDCT<4, 4>()(
438
60.0k
      block,
439
60.0k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
60.0k
            pixels_stride),
441
60.0k
      scratch_space);
442
  // IDCT4x8.
443
60.0k
  block[0] = dcs[2];
444
300k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.16M
    for (size_t ix = 0; ix < 8; ix++) {
446
1.92M
      if (ix == 0 && iy == 0) continue;
447
1.86M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
1.86M
    }
449
240k
  }
450
60.0k
  ComputeScaledIDCT<4, 8>()(
451
60.0k
      block,
452
60.0k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
60.0k
      scratch_space);
454
60.0k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
43.3k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
43.3k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
43.3k
  size_t afv_x = afv_kind & 1;
404
43.3k
  size_t afv_y = afv_kind / 2;
405
43.3k
  float dcs[3] = {};
406
43.3k
  float block00 = coefficients[0];
407
43.3k
  float block01 = coefficients[1];
408
43.3k
  float block10 = coefficients[8];
409
43.3k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
43.3k
  dcs[1] = (block00 + block10 - block01);
411
43.3k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
43.3k
  HWY_ALIGN float coeff[4 * 4];
414
43.3k
  coeff[0] = dcs[0];
415
216k
  for (size_t iy = 0; iy < 4; iy++) {
416
866k
    for (size_t ix = 0; ix < 4; ix++) {
417
692k
      if (ix == 0 && iy == 0) continue;
418
649k
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
649k
    }
420
173k
  }
421
43.3k
  HWY_ALIGN float block[4 * 8];
422
43.3k
  AFVIDCT4x4(coeff, block);
423
216k
  for (size_t iy = 0; iy < 4; iy++) {
424
866k
    for (size_t ix = 0; ix < 4; ix++) {
425
692k
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
692k
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
692k
    }
428
173k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
43.3k
  block[0] = dcs[1];
431
216k
  for (size_t iy = 0; iy < 4; iy++) {
432
866k
    for (size_t ix = 0; ix < 4; ix++) {
433
692k
      if (ix == 0 && iy == 0) continue;
434
649k
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
649k
    }
436
173k
  }
437
43.3k
  ComputeScaledIDCT<4, 4>()(
438
43.3k
      block,
439
43.3k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
43.3k
            pixels_stride),
441
43.3k
      scratch_space);
442
  // IDCT4x8.
443
43.3k
  block[0] = dcs[2];
444
216k
  for (size_t iy = 0; iy < 4; iy++) {
445
1.55M
    for (size_t ix = 0; ix < 8; ix++) {
446
1.38M
      if (ix == 0 && iy == 0) continue;
447
1.34M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
1.34M
    }
449
173k
  }
450
43.3k
  ComputeScaledIDCT<4, 8>()(
451
43.3k
      block,
452
43.3k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
43.3k
      scratch_space);
454
43.3k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
46.9k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
46.9k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
46.9k
  size_t afv_x = afv_kind & 1;
404
46.9k
  size_t afv_y = afv_kind / 2;
405
46.9k
  float dcs[3] = {};
406
46.9k
  float block00 = coefficients[0];
407
46.9k
  float block01 = coefficients[1];
408
46.9k
  float block10 = coefficients[8];
409
46.9k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
46.9k
  dcs[1] = (block00 + block10 - block01);
411
46.9k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
46.9k
  HWY_ALIGN float coeff[4 * 4];
414
46.9k
  coeff[0] = dcs[0];
415
234k
  for (size_t iy = 0; iy < 4; iy++) {
416
939k
    for (size_t ix = 0; ix < 4; ix++) {
417
751k
      if (ix == 0 && iy == 0) continue;
418
704k
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
704k
    }
420
187k
  }
421
46.9k
  HWY_ALIGN float block[4 * 8];
422
46.9k
  AFVIDCT4x4(coeff, block);
423
234k
  for (size_t iy = 0; iy < 4; iy++) {
424
939k
    for (size_t ix = 0; ix < 4; ix++) {
425
751k
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
751k
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
751k
    }
428
187k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
46.9k
  block[0] = dcs[1];
431
234k
  for (size_t iy = 0; iy < 4; iy++) {
432
939k
    for (size_t ix = 0; ix < 4; ix++) {
433
751k
      if (ix == 0 && iy == 0) continue;
434
704k
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
704k
    }
436
187k
  }
437
46.9k
  ComputeScaledIDCT<4, 4>()(
438
46.9k
      block,
439
46.9k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
46.9k
            pixels_stride),
441
46.9k
      scratch_space);
442
  // IDCT4x8.
443
46.9k
  block[0] = dcs[2];
444
234k
  for (size_t iy = 0; iy < 4; iy++) {
445
1.69M
    for (size_t ix = 0; ix < 8; ix++) {
446
1.50M
      if (ix == 0 && iy == 0) continue;
447
1.45M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
1.45M
    }
449
187k
  }
450
46.9k
  ComputeScaledIDCT<4, 8>()(
451
46.9k
      block,
452
46.9k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
46.9k
      scratch_space);
454
46.9k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
69.0k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
69.0k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
69.0k
  size_t afv_x = afv_kind & 1;
404
69.0k
  size_t afv_y = afv_kind / 2;
405
69.0k
  float dcs[3] = {};
406
69.0k
  float block00 = coefficients[0];
407
69.0k
  float block01 = coefficients[1];
408
69.0k
  float block10 = coefficients[8];
409
69.0k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
69.0k
  dcs[1] = (block00 + block10 - block01);
411
69.0k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
69.0k
  HWY_ALIGN float coeff[4 * 4];
414
69.0k
  coeff[0] = dcs[0];
415
345k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.38M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.10M
      if (ix == 0 && iy == 0) continue;
418
1.03M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
1.03M
    }
420
276k
  }
421
69.0k
  HWY_ALIGN float block[4 * 8];
422
69.0k
  AFVIDCT4x4(coeff, block);
423
345k
  for (size_t iy = 0; iy < 4; iy++) {
424
1.38M
    for (size_t ix = 0; ix < 4; ix++) {
425
1.10M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
1.10M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
1.10M
    }
428
276k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
69.0k
  block[0] = dcs[1];
431
345k
  for (size_t iy = 0; iy < 4; iy++) {
432
1.38M
    for (size_t ix = 0; ix < 4; ix++) {
433
1.10M
      if (ix == 0 && iy == 0) continue;
434
1.03M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
1.03M
    }
436
276k
  }
437
69.0k
  ComputeScaledIDCT<4, 4>()(
438
69.0k
      block,
439
69.0k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
69.0k
            pixels_stride),
441
69.0k
      scratch_space);
442
  // IDCT4x8.
443
69.0k
  block[0] = dcs[2];
444
345k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.48M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.20M
      if (ix == 0 && iy == 0) continue;
447
2.14M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
2.14M
    }
449
276k
  }
450
69.0k
  ComputeScaledIDCT<4, 8>()(
451
69.0k
      block,
452
69.0k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
69.0k
      scratch_space);
454
69.0k
}
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
455
456
HWY_MAYBE_UNUSED void TransformToPixels(const AcStrategyType strategy,
457
                                        float* JXL_RESTRICT coefficients,
458
                                        float* JXL_RESTRICT pixels,
459
                                        size_t pixels_stride,
460
17.6M
                                        float* scratch_space) {
461
17.6M
  using Type = AcStrategyType;
462
17.6M
  switch (strategy) {
463
1.47M
    case Type::IDENTITY: {
464
1.47M
      float dcs[4] = {};
465
1.47M
      float block00 = coefficients[0];
466
1.47M
      float block01 = coefficients[1];
467
1.47M
      float block10 = coefficients[8];
468
1.47M
      float block11 = coefficients[9];
469
1.47M
      dcs[0] = block00 + block01 + block10 + block11;
470
1.47M
      dcs[1] = block00 + block01 - block10 - block11;
471
1.47M
      dcs[2] = block00 - block01 + block10 - block11;
472
1.47M
      dcs[3] = block00 - block01 - block10 + block11;
473
4.42M
      for (size_t y = 0; y < 2; y++) {
474
8.85M
        for (size_t x = 0; x < 2; x++) {
475
5.90M
          float block_dc = dcs[y * 2 + x];
476
5.90M
          float residual_sum = 0;
477
29.5M
          for (size_t iy = 0; iy < 4; iy++) {
478
118M
            for (size_t ix = 0; ix < 4; ix++) {
479
94.5M
              if (ix == 0 && iy == 0) continue;
480
88.5M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
88.5M
            }
482
23.6M
          }
483
5.90M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
5.90M
              block_dc - residual_sum * (1.0f / 16);
485
29.5M
          for (size_t iy = 0; iy < 4; iy++) {
486
118M
            for (size_t ix = 0; ix < 4; ix++) {
487
94.5M
              if (ix == 1 && iy == 1) continue;
488
88.5M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
88.5M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
88.5M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
88.5M
            }
492
23.6M
          }
493
5.90M
          pixels[y * 4 * pixels_stride + x * 4] =
494
5.90M
              coefficients[(y + 2) * 8 + x + 2] +
495
5.90M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
5.90M
        }
497
2.95M
      }
498
1.47M
      break;
499
0
    }
500
1.33M
    case Type::DCT8X4: {
501
1.33M
      float dcs[2] = {};
502
1.33M
      float block0 = coefficients[0];
503
1.33M
      float block1 = coefficients[8];
504
1.33M
      dcs[0] = block0 + block1;
505
1.33M
      dcs[1] = block0 - block1;
506
4.01M
      for (size_t x = 0; x < 2; x++) {
507
2.67M
        HWY_ALIGN float block[4 * 8];
508
2.67M
        block[0] = dcs[x];
509
13.3M
        for (size_t iy = 0; iy < 4; iy++) {
510
96.2M
          for (size_t ix = 0; ix < 8; ix++) {
511
85.5M
            if (ix == 0 && iy == 0) continue;
512
82.9M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
82.9M
          }
514
10.6M
        }
515
2.67M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
2.67M
                                  scratch_space);
517
2.67M
      }
518
1.33M
      break;
519
0
    }
520
1.25M
    case Type::DCT4X8: {
521
1.25M
      float dcs[2] = {};
522
1.25M
      float block0 = coefficients[0];
523
1.25M
      float block1 = coefficients[8];
524
1.25M
      dcs[0] = block0 + block1;
525
1.25M
      dcs[1] = block0 - block1;
526
3.77M
      for (size_t y = 0; y < 2; y++) {
527
2.51M
        HWY_ALIGN float block[4 * 8];
528
2.51M
        block[0] = dcs[y];
529
12.5M
        for (size_t iy = 0; iy < 4; iy++) {
530
90.6M
          for (size_t ix = 0; ix < 8; ix++) {
531
80.5M
            if (ix == 0 && iy == 0) continue;
532
78.0M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
78.0M
          }
534
10.0M
        }
535
2.51M
        ComputeScaledIDCT<4, 8>()(
536
2.51M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
2.51M
            scratch_space);
538
2.51M
      }
539
1.25M
      break;
540
0
    }
541
1.19M
    case Type::DCT4X4: {
542
1.19M
      float dcs[4] = {};
543
1.19M
      float block00 = coefficients[0];
544
1.19M
      float block01 = coefficients[1];
545
1.19M
      float block10 = coefficients[8];
546
1.19M
      float block11 = coefficients[9];
547
1.19M
      dcs[0] = block00 + block01 + block10 + block11;
548
1.19M
      dcs[1] = block00 + block01 - block10 - block11;
549
1.19M
      dcs[2] = block00 - block01 + block10 - block11;
550
1.19M
      dcs[3] = block00 - block01 - block10 + block11;
551
3.59M
      for (size_t y = 0; y < 2; y++) {
552
7.19M
        for (size_t x = 0; x < 2; x++) {
553
4.79M
          HWY_ALIGN float block[4 * 4];
554
4.79M
          block[0] = dcs[y * 2 + x];
555
23.9M
          for (size_t iy = 0; iy < 4; iy++) {
556
95.8M
            for (size_t ix = 0; ix < 4; ix++) {
557
76.7M
              if (ix == 0 && iy == 0) continue;
558
71.9M
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
71.9M
            }
560
19.1M
          }
561
4.79M
          ComputeScaledIDCT<4, 4>()(
562
4.79M
              block,
563
4.79M
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
4.79M
              scratch_space);
565
4.79M
        }
566
2.39M
      }
567
1.19M
      break;
568
0
    }
569
1.82M
    case Type::DCT2X2: {
570
1.82M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
1.82M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
1.82M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
1.82M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
1.82M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
16.4M
      for (size_t y = 0; y < kBlockDim; y++) {
576
131M
        for (size_t x = 0; x < kBlockDim; x++) {
577
116M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
116M
        }
579
14.6M
      }
580
1.82M
      break;
581
0
    }
582
583k
    case Type::DCT16X16: {
583
583k
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
583k
                                  scratch_space);
585
583k
      break;
586
0
    }
587
1.11M
    case Type::DCT16X8: {
588
1.11M
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
1.11M
                                 scratch_space);
590
1.11M
      break;
591
0
    }
592
1.12M
    case Type::DCT8X16: {
593
1.12M
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
1.12M
                                 scratch_space);
595
1.12M
      break;
596
0
    }
597
6
    case Type::DCT32X8: {
598
6
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
6
                                 scratch_space);
600
6
      break;
601
0
    }
602
27
    case Type::DCT8X32: {
603
27
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
27
                                 scratch_space);
605
27
      break;
606
0
    }
607
220k
    case Type::DCT32X16: {
608
220k
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
220k
                                  scratch_space);
610
220k
      break;
611
0
    }
612
227k
    case Type::DCT16X32: {
613
227k
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
227k
                                  scratch_space);
615
227k
      break;
616
0
    }
617
143k
    case Type::DCT32X32: {
618
143k
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
143k
                                  scratch_space);
620
143k
      break;
621
0
    }
622
1.99M
    case Type::DCT: {
623
1.99M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
1.99M
                                scratch_space);
625
1.99M
      break;
626
0
    }
627
1.25M
    case Type::AFV0: {
628
1.25M
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
1.25M
      break;
630
0
    }
631
1.23M
    case Type::AFV1: {
632
1.23M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
1.23M
      break;
634
0
    }
635
1.24M
    case Type::AFV2: {
636
1.24M
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
1.24M
      break;
638
0
    }
639
1.26M
    case Type::AFV3: {
640
1.26M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
1.26M
      break;
642
0
    }
643
66.3k
    case Type::DCT64X32: {
644
66.3k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
66.3k
                                  scratch_space);
646
66.3k
      break;
647
0
    }
648
48.3k
    case Type::DCT32X64: {
649
48.3k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
48.3k
                                  scratch_space);
651
48.3k
      break;
652
0
    }
653
20.7k
    case Type::DCT64X64: {
654
20.7k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
20.7k
                                  scratch_space);
656
20.7k
      break;
657
0
    }
658
0
    case Type::DCT128X64: {
659
0
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT64X128: {
664
0
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
0
    case Type::DCT128X128: {
669
0
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
0
                                    scratch_space);
671
0
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
17.6M
  }
689
17.6M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
15.1M
                                        float* scratch_space) {
461
15.1M
  using Type = AcStrategyType;
462
15.1M
  switch (strategy) {
463
1.19M
    case Type::IDENTITY: {
464
1.19M
      float dcs[4] = {};
465
1.19M
      float block00 = coefficients[0];
466
1.19M
      float block01 = coefficients[1];
467
1.19M
      float block10 = coefficients[8];
468
1.19M
      float block11 = coefficients[9];
469
1.19M
      dcs[0] = block00 + block01 + block10 + block11;
470
1.19M
      dcs[1] = block00 + block01 - block10 - block11;
471
1.19M
      dcs[2] = block00 - block01 + block10 - block11;
472
1.19M
      dcs[3] = block00 - block01 - block10 + block11;
473
3.58M
      for (size_t y = 0; y < 2; y++) {
474
7.16M
        for (size_t x = 0; x < 2; x++) {
475
4.77M
          float block_dc = dcs[y * 2 + x];
476
4.77M
          float residual_sum = 0;
477
23.8M
          for (size_t iy = 0; iy < 4; iy++) {
478
95.5M
            for (size_t ix = 0; ix < 4; ix++) {
479
76.4M
              if (ix == 0 && iy == 0) continue;
480
71.6M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
71.6M
            }
482
19.1M
          }
483
4.77M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
4.77M
              block_dc - residual_sum * (1.0f / 16);
485
23.8M
          for (size_t iy = 0; iy < 4; iy++) {
486
95.5M
            for (size_t ix = 0; ix < 4; ix++) {
487
76.4M
              if (ix == 1 && iy == 1) continue;
488
71.6M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
71.6M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
71.6M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
71.6M
            }
492
19.1M
          }
493
4.77M
          pixels[y * 4 * pixels_stride + x * 4] =
494
4.77M
              coefficients[(y + 2) * 8 + x + 2] +
495
4.77M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
4.77M
        }
497
2.38M
      }
498
1.19M
      break;
499
0
    }
500
1.19M
    case Type::DCT8X4: {
501
1.19M
      float dcs[2] = {};
502
1.19M
      float block0 = coefficients[0];
503
1.19M
      float block1 = coefficients[8];
504
1.19M
      dcs[0] = block0 + block1;
505
1.19M
      dcs[1] = block0 - block1;
506
3.58M
      for (size_t x = 0; x < 2; x++) {
507
2.38M
        HWY_ALIGN float block[4 * 8];
508
2.38M
        block[0] = dcs[x];
509
11.9M
        for (size_t iy = 0; iy < 4; iy++) {
510
85.9M
          for (size_t ix = 0; ix < 8; ix++) {
511
76.4M
            if (ix == 0 && iy == 0) continue;
512
74.0M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
74.0M
          }
514
9.55M
        }
515
2.38M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
2.38M
                                  scratch_space);
517
2.38M
      }
518
1.19M
      break;
519
0
    }
520
1.19M
    case Type::DCT4X8: {
521
1.19M
      float dcs[2] = {};
522
1.19M
      float block0 = coefficients[0];
523
1.19M
      float block1 = coefficients[8];
524
1.19M
      dcs[0] = block0 + block1;
525
1.19M
      dcs[1] = block0 - block1;
526
3.58M
      for (size_t y = 0; y < 2; y++) {
527
2.38M
        HWY_ALIGN float block[4 * 8];
528
2.38M
        block[0] = dcs[y];
529
11.9M
        for (size_t iy = 0; iy < 4; iy++) {
530
85.9M
          for (size_t ix = 0; ix < 8; ix++) {
531
76.4M
            if (ix == 0 && iy == 0) continue;
532
74.0M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
74.0M
          }
534
9.55M
        }
535
2.38M
        ComputeScaledIDCT<4, 8>()(
536
2.38M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
2.38M
            scratch_space);
538
2.38M
      }
539
1.19M
      break;
540
0
    }
541
1.19M
    case Type::DCT4X4: {
542
1.19M
      float dcs[4] = {};
543
1.19M
      float block00 = coefficients[0];
544
1.19M
      float block01 = coefficients[1];
545
1.19M
      float block10 = coefficients[8];
546
1.19M
      float block11 = coefficients[9];
547
1.19M
      dcs[0] = block00 + block01 + block10 + block11;
548
1.19M
      dcs[1] = block00 + block01 - block10 - block11;
549
1.19M
      dcs[2] = block00 - block01 + block10 - block11;
550
1.19M
      dcs[3] = block00 - block01 - block10 + block11;
551
3.58M
      for (size_t y = 0; y < 2; y++) {
552
7.16M
        for (size_t x = 0; x < 2; x++) {
553
4.77M
          HWY_ALIGN float block[4 * 4];
554
4.77M
          block[0] = dcs[y * 2 + x];
555
23.8M
          for (size_t iy = 0; iy < 4; iy++) {
556
95.5M
            for (size_t ix = 0; ix < 4; ix++) {
557
76.4M
              if (ix == 0 && iy == 0) continue;
558
71.6M
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
71.6M
            }
560
19.1M
          }
561
4.77M
          ComputeScaledIDCT<4, 4>()(
562
4.77M
              block,
563
4.77M
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
4.77M
              scratch_space);
565
4.77M
        }
566
2.38M
      }
567
1.19M
      break;
568
0
    }
569
1.19M
    case Type::DCT2X2: {
570
1.19M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
1.19M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
1.19M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
1.19M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
1.19M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
10.7M
      for (size_t y = 0; y < kBlockDim; y++) {
576
85.9M
        for (size_t x = 0; x < kBlockDim; x++) {
577
76.4M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
76.4M
        }
579
9.55M
      }
580
1.19M
      break;
581
0
    }
582
525k
    case Type::DCT16X16: {
583
525k
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
525k
                                  scratch_space);
585
525k
      break;
586
0
    }
587
1.03M
    case Type::DCT16X8: {
588
1.03M
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
1.03M
                                 scratch_space);
590
1.03M
      break;
591
0
    }
592
1.03M
    case Type::DCT8X16: {
593
1.03M
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
1.03M
                                 scratch_space);
595
1.03M
      break;
596
0
    }
597
0
    case Type::DCT32X8: {
598
0
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
0
                                 scratch_space);
600
0
      break;
601
0
    }
602
0
    case Type::DCT8X32: {
603
0
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
0
                                 scratch_space);
605
0
      break;
606
0
    }
607
203k
    case Type::DCT32X16: {
608
203k
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
203k
                                  scratch_space);
610
203k
      break;
611
0
    }
612
206k
    case Type::DCT16X32: {
613
206k
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
206k
                                  scratch_space);
615
206k
      break;
616
0
    }
617
106k
    case Type::DCT32X32: {
618
106k
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
106k
                                  scratch_space);
620
106k
      break;
621
0
    }
622
1.19M
    case Type::DCT: {
623
1.19M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
1.19M
                                scratch_space);
625
1.19M
      break;
626
0
    }
627
1.19M
    case Type::AFV0: {
628
1.19M
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
1.19M
      break;
630
0
    }
631
1.19M
    case Type::AFV1: {
632
1.19M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
1.19M
      break;
634
0
    }
635
1.19M
    case Type::AFV2: {
636
1.19M
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
1.19M
      break;
638
0
    }
639
1.19M
    case Type::AFV3: {
640
1.19M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
1.19M
      break;
642
0
    }
643
65.8k
    case Type::DCT64X32: {
644
65.8k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
65.8k
                                  scratch_space);
646
65.8k
      break;
647
0
    }
648
48.0k
    case Type::DCT32X64: {
649
48.0k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
48.0k
                                  scratch_space);
651
48.0k
      break;
652
0
    }
653
17.0k
    case Type::DCT64X64: {
654
17.0k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
17.0k
                                  scratch_space);
656
17.0k
      break;
657
0
    }
658
0
    case Type::DCT128X64: {
659
0
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT64X128: {
664
0
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
0
    case Type::DCT128X128: {
669
0
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
0
                                    scratch_space);
671
0
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
15.1M
  }
689
15.1M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
2.44M
                                        float* scratch_space) {
461
2.44M
  using Type = AcStrategyType;
462
2.44M
  switch (strategy) {
463
282k
    case Type::IDENTITY: {
464
282k
      float dcs[4] = {};
465
282k
      float block00 = coefficients[0];
466
282k
      float block01 = coefficients[1];
467
282k
      float block10 = coefficients[8];
468
282k
      float block11 = coefficients[9];
469
282k
      dcs[0] = block00 + block01 + block10 + block11;
470
282k
      dcs[1] = block00 + block01 - block10 - block11;
471
282k
      dcs[2] = block00 - block01 + block10 - block11;
472
282k
      dcs[3] = block00 - block01 - block10 + block11;
473
846k
      for (size_t y = 0; y < 2; y++) {
474
1.69M
        for (size_t x = 0; x < 2; x++) {
475
1.12M
          float block_dc = dcs[y * 2 + x];
476
1.12M
          float residual_sum = 0;
477
5.64M
          for (size_t iy = 0; iy < 4; iy++) {
478
22.5M
            for (size_t ix = 0; ix < 4; ix++) {
479
18.0M
              if (ix == 0 && iy == 0) continue;
480
16.9M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
16.9M
            }
482
4.51M
          }
483
1.12M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
1.12M
              block_dc - residual_sum * (1.0f / 16);
485
5.64M
          for (size_t iy = 0; iy < 4; iy++) {
486
22.5M
            for (size_t ix = 0; ix < 4; ix++) {
487
18.0M
              if (ix == 1 && iy == 1) continue;
488
16.9M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
16.9M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
16.9M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
16.9M
            }
492
4.51M
          }
493
1.12M
          pixels[y * 4 * pixels_stride + x * 4] =
494
1.12M
              coefficients[(y + 2) * 8 + x + 2] +
495
1.12M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
1.12M
        }
497
564k
      }
498
282k
      break;
499
0
    }
500
143k
    case Type::DCT8X4: {
501
143k
      float dcs[2] = {};
502
143k
      float block0 = coefficients[0];
503
143k
      float block1 = coefficients[8];
504
143k
      dcs[0] = block0 + block1;
505
143k
      dcs[1] = block0 - block1;
506
429k
      for (size_t x = 0; x < 2; x++) {
507
286k
        HWY_ALIGN float block[4 * 8];
508
286k
        block[0] = dcs[x];
509
1.43M
        for (size_t iy = 0; iy < 4; iy++) {
510
10.3M
          for (size_t ix = 0; ix < 8; ix++) {
511
9.15M
            if (ix == 0 && iy == 0) continue;
512
8.87M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
8.87M
          }
514
1.14M
        }
515
286k
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
286k
                                  scratch_space);
517
286k
      }
518
143k
      break;
519
0
    }
520
64.4k
    case Type::DCT4X8: {
521
64.4k
      float dcs[2] = {};
522
64.4k
      float block0 = coefficients[0];
523
64.4k
      float block1 = coefficients[8];
524
64.4k
      dcs[0] = block0 + block1;
525
64.4k
      dcs[1] = block0 - block1;
526
193k
      for (size_t y = 0; y < 2; y++) {
527
128k
        HWY_ALIGN float block[4 * 8];
528
128k
        block[0] = dcs[y];
529
644k
        for (size_t iy = 0; iy < 4; iy++) {
530
4.64M
          for (size_t ix = 0; ix < 8; ix++) {
531
4.12M
            if (ix == 0 && iy == 0) continue;
532
3.99M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
3.99M
          }
534
515k
        }
535
128k
        ComputeScaledIDCT<4, 8>()(
536
128k
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
128k
            scratch_space);
538
128k
      }
539
64.4k
      break;
540
0
    }
541
4.16k
    case Type::DCT4X4: {
542
4.16k
      float dcs[4] = {};
543
4.16k
      float block00 = coefficients[0];
544
4.16k
      float block01 = coefficients[1];
545
4.16k
      float block10 = coefficients[8];
546
4.16k
      float block11 = coefficients[9];
547
4.16k
      dcs[0] = block00 + block01 + block10 + block11;
548
4.16k
      dcs[1] = block00 + block01 - block10 - block11;
549
4.16k
      dcs[2] = block00 - block01 + block10 - block11;
550
4.16k
      dcs[3] = block00 - block01 - block10 + block11;
551
12.4k
      for (size_t y = 0; y < 2; y++) {
552
24.9k
        for (size_t x = 0; x < 2; x++) {
553
16.6k
          HWY_ALIGN float block[4 * 4];
554
16.6k
          block[0] = dcs[y * 2 + x];
555
83.2k
          for (size_t iy = 0; iy < 4; iy++) {
556
333k
            for (size_t ix = 0; ix < 4; ix++) {
557
266k
              if (ix == 0 && iy == 0) continue;
558
249k
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
249k
            }
560
66.6k
          }
561
16.6k
          ComputeScaledIDCT<4, 4>()(
562
16.6k
              block,
563
16.6k
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
16.6k
              scratch_space);
565
16.6k
        }
566
8.32k
      }
567
4.16k
      break;
568
0
    }
569
633k
    case Type::DCT2X2: {
570
633k
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
633k
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
633k
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
633k
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
633k
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
5.69M
      for (size_t y = 0; y < kBlockDim; y++) {
576
45.5M
        for (size_t x = 0; x < kBlockDim; x++) {
577
40.5M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
40.5M
        }
579
5.06M
      }
580
633k
      break;
581
0
    }
582
58.0k
    case Type::DCT16X16: {
583
58.0k
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
58.0k
                                  scratch_space);
585
58.0k
      break;
586
0
    }
587
79.2k
    case Type::DCT16X8: {
588
79.2k
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
79.2k
                                 scratch_space);
590
79.2k
      break;
591
0
    }
592
83.9k
    case Type::DCT8X16: {
593
83.9k
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
83.9k
                                 scratch_space);
595
83.9k
      break;
596
0
    }
597
6
    case Type::DCT32X8: {
598
6
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
6
                                 scratch_space);
600
6
      break;
601
0
    }
602
27
    case Type::DCT8X32: {
603
27
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
27
                                 scratch_space);
605
27
      break;
606
0
    }
607
16.7k
    case Type::DCT32X16: {
608
16.7k
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
16.7k
                                  scratch_space);
610
16.7k
      break;
611
0
    }
612
20.8k
    case Type::DCT16X32: {
613
20.8k
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
20.8k
                                  scratch_space);
615
20.8k
      break;
616
0
    }
617
37.0k
    case Type::DCT32X32: {
618
37.0k
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
37.0k
                                  scratch_space);
620
37.0k
      break;
621
0
    }
622
801k
    case Type::DCT: {
623
801k
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
801k
                                scratch_space);
625
801k
      break;
626
0
    }
627
60.0k
    case Type::AFV0: {
628
60.0k
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
60.0k
      break;
630
0
    }
631
43.3k
    case Type::AFV1: {
632
43.3k
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
43.3k
      break;
634
0
    }
635
46.9k
    case Type::AFV2: {
636
46.9k
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
46.9k
      break;
638
0
    }
639
69.0k
    case Type::AFV3: {
640
69.0k
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
69.0k
      break;
642
0
    }
643
489
    case Type::DCT64X32: {
644
489
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
489
                                  scratch_space);
646
489
      break;
647
0
    }
648
375
    case Type::DCT32X64: {
649
375
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
375
                                  scratch_space);
651
375
      break;
652
0
    }
653
3.69k
    case Type::DCT64X64: {
654
3.69k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
3.69k
                                  scratch_space);
656
3.69k
      break;
657
0
    }
658
0
    case Type::DCT128X64: {
659
0
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT64X128: {
664
0
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
0
    case Type::DCT128X128: {
669
0
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
0
                                    scratch_space);
671
0
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
2.44M
  }
689
2.44M
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
690
691
HWY_MAYBE_UNUSED void LowestFrequenciesFromDC(const AcStrategyType strategy,
692
                                              const float* dc, size_t dc_stride,
693
                                              float* llf,
694
2.45M
                                              float* JXL_RESTRICT scratch) {
695
2.45M
  using Type = AcStrategyType;
696
2.45M
  HWY_ALIGN float warm_block[4 * 4];
697
2.45M
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
2.45M
  switch (strategy) {
699
79.2k
    case Type::DCT16X8: {
700
79.2k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
79.2k
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
79.2k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
79.2k
      break;
704
0
    }
705
83.9k
    case Type::DCT8X16: {
706
83.9k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
83.9k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
83.9k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
83.9k
      break;
710
0
    }
711
58.0k
    case Type::DCT16X16: {
712
58.0k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
58.0k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
58.0k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
58.0k
      break;
716
0
    }
717
6
    case Type::DCT32X8: {
718
6
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
6
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
6
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
6
      break;
722
0
    }
723
27
    case Type::DCT8X32: {
724
27
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
27
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
27
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
27
      break;
728
0
    }
729
16.7k
    case Type::DCT32X16: {
730
16.7k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
16.7k
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
16.7k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
16.7k
      break;
734
0
    }
735
20.8k
    case Type::DCT16X32: {
736
20.8k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
20.8k
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
20.8k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
20.8k
      break;
740
0
    }
741
37.0k
    case Type::DCT32X32: {
742
37.0k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
37.0k
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
37.0k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
37.0k
      break;
746
0
    }
747
489
    case Type::DCT64X32: {
748
489
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
489
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
489
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
489
      break;
752
0
    }
753
375
    case Type::DCT32X64: {
754
375
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
375
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
375
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
375
      break;
758
0
    }
759
3.69k
    case Type::DCT64X64: {
760
3.69k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
3.69k
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
3.69k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
3.69k
      break;
764
0
    }
765
0
    case Type::DCT128X64: {
766
0
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
0
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
0
      break;
770
0
    }
771
0
    case Type::DCT64X128: {
772
0
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
0
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
0
      break;
776
0
    }
777
0
    case Type::DCT128X128: {
778
0
      ReinterpretingDCT<
779
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
0
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
0
    case Type::DCT256X256: {
799
0
      ReinterpretingDCT<
800
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
0
      break;
804
0
    }
805
804k
    case Type::DCT:
806
1.43M
    case Type::DCT2X2:
807
1.44M
    case Type::DCT4X4:
808
1.50M
    case Type::DCT4X8:
809
1.64M
    case Type::DCT8X4:
810
1.70M
    case Type::AFV0:
811
1.75M
    case Type::AFV1:
812
1.79M
    case Type::AFV2:
813
1.86M
    case Type::AFV3:
814
2.15M
    case Type::IDENTITY:
815
2.15M
      llf[0] = dc[0];
816
2.15M
      break;
817
2.45M
  };
818
2.45M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
694
2.45M
                                              float* JXL_RESTRICT scratch) {
695
2.45M
  using Type = AcStrategyType;
696
2.45M
  HWY_ALIGN float warm_block[4 * 4];
697
2.45M
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
2.45M
  switch (strategy) {
699
79.2k
    case Type::DCT16X8: {
700
79.2k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
79.2k
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
79.2k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
79.2k
      break;
704
0
    }
705
83.9k
    case Type::DCT8X16: {
706
83.9k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
83.9k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
83.9k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
83.9k
      break;
710
0
    }
711
58.0k
    case Type::DCT16X16: {
712
58.0k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
58.0k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
58.0k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
58.0k
      break;
716
0
    }
717
6
    case Type::DCT32X8: {
718
6
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
6
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
6
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
6
      break;
722
0
    }
723
27
    case Type::DCT8X32: {
724
27
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
27
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
27
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
27
      break;
728
0
    }
729
16.7k
    case Type::DCT32X16: {
730
16.7k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
16.7k
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
16.7k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
16.7k
      break;
734
0
    }
735
20.8k
    case Type::DCT16X32: {
736
20.8k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
20.8k
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
20.8k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
20.8k
      break;
740
0
    }
741
37.0k
    case Type::DCT32X32: {
742
37.0k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
37.0k
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
37.0k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
37.0k
      break;
746
0
    }
747
489
    case Type::DCT64X32: {
748
489
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
489
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
489
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
489
      break;
752
0
    }
753
375
    case Type::DCT32X64: {
754
375
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
375
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
375
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
375
      break;
758
0
    }
759
3.69k
    case Type::DCT64X64: {
760
3.69k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
3.69k
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
3.69k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
3.69k
      break;
764
0
    }
765
0
    case Type::DCT128X64: {
766
0
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
0
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
0
      break;
770
0
    }
771
0
    case Type::DCT64X128: {
772
0
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
0
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
0
      break;
776
0
    }
777
0
    case Type::DCT128X128: {
778
0
      ReinterpretingDCT<
779
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
0
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
0
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
0
    case Type::DCT256X256: {
799
0
      ReinterpretingDCT<
800
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
0
      break;
804
0
    }
805
804k
    case Type::DCT:
806
1.43M
    case Type::DCT2X2:
807
1.44M
    case Type::DCT4X4:
808
1.50M
    case Type::DCT4X8:
809
1.64M
    case Type::DCT8X4:
810
1.70M
    case Type::AFV0:
811
1.75M
    case Type::AFV1:
812
1.79M
    case Type::AFV2:
813
1.86M
    case Type::AFV3:
814
2.15M
    case Type::IDENTITY:
815
2.15M
      llf[0] = dc[0];
816
2.15M
      break;
817
2.45M
  };
818
2.45M
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
819
820
}  // namespace
821
// NOLINTNEXTLINE(google-readability-namespace-comments)
822
}  // namespace HWY_NAMESPACE
823
}  // namespace jxl
824
HWY_AFTER_NAMESPACE();
825
826
#endif  // LIB_JXL_DEC_TRANSFORMS_INL_H_