Coverage Report

Created: 2026-06-07 07:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/dec_transforms-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include <cstring>
7
8
#include "lib/jxl/base/compiler_specific.h"
9
#include "lib/jxl/frame_dimensions.h"
10
11
#if defined(LIB_JXL_DEC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
12
#ifdef LIB_JXL_DEC_TRANSFORMS_INL_H_
13
#undef LIB_JXL_DEC_TRANSFORMS_INL_H_
14
#else
15
#define LIB_JXL_DEC_TRANSFORMS_INL_H_
16
#endif
17
18
#include <cstddef>
19
#include <hwy/highway.h>
20
21
#include "lib/jxl/ac_strategy.h"
22
#include "lib/jxl/dct-inl.h"
23
#include "lib/jxl/dct_scales.h"
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
namespace HWY_NAMESPACE {
27
namespace {
28
29
// These templates are not found via ADL.
30
using hwy::HWY_NAMESPACE::MulAdd;
31
32
// Computes the lowest-frequency LF_ROWSxLF_COLS-sized square in output, which
33
// is a DCT_ROWS*DCT_COLS-sized DCT block, by doing a ROWS*COLS DCT on the
34
// input block.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
JXL_INLINE void ReinterpretingDCT(const float* input, const size_t input_stride,
38
                                  float* output, const size_t output_stride,
39
                                  float* JXL_RESTRICT block,
40
3.02M
                                  float* JXL_RESTRICT scratch_space) {
41
3.02M
  static_assert(LF_ROWS == ROWS,
42
3.02M
                "ReinterpretingDCT should only be called with LF == N");
43
3.02M
  static_assert(LF_COLS == COLS,
44
3.02M
                "ReinterpretingDCT should only be called with LF == N");
45
3.02M
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
3.02M
                                 scratch_space);
47
3.02M
  if (ROWS < COLS) {
48
2.24M
    for (size_t y = 0; y < LF_ROWS; y++) {
49
4.94M
      for (size_t x = 0; x < LF_COLS; x++) {
50
3.70M
        output[y * output_stride + x] =
51
3.70M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
3.70M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
3.70M
      }
54
1.24M
    }
55
2.02M
  } else {
56
7.17M
    for (size_t y = 0; y < LF_COLS; y++) {
57
28.5M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
23.3M
        output[y * output_stride + x] =
59
23.3M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
23.3M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
23.3M
      }
62
5.15M
    }
63
2.02M
  }
64
3.02M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
750k
                                  float* JXL_RESTRICT scratch_space) {
41
750k
  static_assert(LF_ROWS == ROWS,
42
750k
                "ReinterpretingDCT should only be called with LF == N");
43
750k
  static_assert(LF_COLS == COLS,
44
750k
                "ReinterpretingDCT should only be called with LF == N");
45
750k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
750k
                                 scratch_space);
47
750k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
750k
  } else {
56
1.50M
    for (size_t y = 0; y < LF_COLS; y++) {
57
2.25M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.50M
        output[y * output_stride + x] =
59
1.50M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.50M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.50M
      }
62
750k
    }
63
750k
  }
64
750k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
800k
                                  float* JXL_RESTRICT scratch_space) {
41
800k
  static_assert(LF_ROWS == ROWS,
42
800k
                "ReinterpretingDCT should only be called with LF == N");
43
800k
  static_assert(LF_COLS == COLS,
44
800k
                "ReinterpretingDCT should only be called with LF == N");
45
800k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
800k
                                 scratch_space);
47
800k
  if (ROWS < COLS) {
48
1.60M
    for (size_t y = 0; y < LF_ROWS; y++) {
49
2.40M
      for (size_t x = 0; x < LF_COLS; x++) {
50
1.60M
        output[y * output_stride + x] =
51
1.60M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
1.60M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
1.60M
      }
54
800k
    }
55
800k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
800k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
554k
                                  float* JXL_RESTRICT scratch_space) {
41
554k
  static_assert(LF_ROWS == ROWS,
42
554k
                "ReinterpretingDCT should only be called with LF == N");
43
554k
  static_assert(LF_COLS == COLS,
44
554k
                "ReinterpretingDCT should only be called with LF == N");
45
554k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
554k
                                 scratch_space);
47
554k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
554k
  } else {
56
1.66M
    for (size_t y = 0; y < LF_COLS; y++) {
57
3.32M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
2.21M
        output[y * output_stride + x] =
59
2.21M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
2.21M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
2.21M
      }
62
1.10M
    }
63
554k
  }
64
554k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
42
                                  float* JXL_RESTRICT scratch_space) {
41
42
  static_assert(LF_ROWS == ROWS,
42
42
                "ReinterpretingDCT should only be called with LF == N");
43
42
  static_assert(LF_COLS == COLS,
44
42
                "ReinterpretingDCT should only be called with LF == N");
45
42
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
42
                                 scratch_space);
47
42
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
42
  } else {
56
84
    for (size_t y = 0; y < LF_COLS; y++) {
57
210
      for (size_t x = 0; x < LF_ROWS; x++) {
58
168
        output[y * output_stride + x] =
59
168
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
168
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
168
      }
62
42
    }
63
42
  }
64
42
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
144
                                  float* JXL_RESTRICT scratch_space) {
41
144
  static_assert(LF_ROWS == ROWS,
42
144
                "ReinterpretingDCT should only be called with LF == N");
43
144
  static_assert(LF_COLS == COLS,
44
144
                "ReinterpretingDCT should only be called with LF == N");
45
144
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
144
                                 scratch_space);
47
144
  if (ROWS < COLS) {
48
288
    for (size_t y = 0; y < LF_ROWS; y++) {
49
720
      for (size_t x = 0; x < LF_COLS; x++) {
50
576
        output[y * output_stride + x] =
51
576
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
576
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
576
      }
54
144
    }
55
144
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
144
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
174k
                                  float* JXL_RESTRICT scratch_space) {
41
174k
  static_assert(LF_ROWS == ROWS,
42
174k
                "ReinterpretingDCT should only be called with LF == N");
43
174k
  static_assert(LF_COLS == COLS,
44
174k
                "ReinterpretingDCT should only be called with LF == N");
45
174k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
174k
                                 scratch_space);
47
174k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
174k
  } else {
56
523k
    for (size_t y = 0; y < LF_COLS; y++) {
57
1.74M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.39M
        output[y * output_stride + x] =
59
1.39M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.39M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.39M
      }
62
348k
    }
63
174k
  }
64
174k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
182k
                                  float* JXL_RESTRICT scratch_space) {
41
182k
  static_assert(LF_ROWS == ROWS,
42
182k
                "ReinterpretingDCT should only be called with LF == N");
43
182k
  static_assert(LF_COLS == COLS,
44
182k
                "ReinterpretingDCT should only be called with LF == N");
45
182k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
182k
                                 scratch_space);
47
182k
  if (ROWS < COLS) {
48
548k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
1.82M
      for (size_t x = 0; x < LF_COLS; x++) {
50
1.46M
        output[y * output_stride + x] =
51
1.46M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
1.46M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
1.46M
      }
54
365k
    }
55
182k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
182k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
330k
                                  float* JXL_RESTRICT scratch_space) {
41
330k
  static_assert(LF_ROWS == ROWS,
42
330k
                "ReinterpretingDCT should only be called with LF == N");
43
330k
  static_assert(LF_COLS == COLS,
44
330k
                "ReinterpretingDCT should only be called with LF == N");
45
330k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
330k
                                 scratch_space);
47
330k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
330k
  } else {
56
1.65M
    for (size_t y = 0; y < LF_COLS; y++) {
57
6.61M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
5.29M
        output[y * output_stride + x] =
59
5.29M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
5.29M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
5.29M
      }
62
1.32M
    }
63
330k
  }
64
330k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
23.5k
                                  float* JXL_RESTRICT scratch_space) {
41
23.5k
  static_assert(LF_ROWS == ROWS,
42
23.5k
                "ReinterpretingDCT should only be called with LF == N");
43
23.5k
  static_assert(LF_COLS == COLS,
44
23.5k
                "ReinterpretingDCT should only be called with LF == N");
45
23.5k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
23.5k
                                 scratch_space);
47
23.5k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
23.5k
  } else {
56
117k
    for (size_t y = 0; y < LF_COLS; y++) {
57
846k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
752k
        output[y * output_stride + x] =
59
752k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
752k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
752k
      }
62
94.0k
    }
63
23.5k
  }
64
23.5k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
19.9k
                                  float* JXL_RESTRICT scratch_space) {
41
19.9k
  static_assert(LF_ROWS == ROWS,
42
19.9k
                "ReinterpretingDCT should only be called with LF == N");
43
19.9k
  static_assert(LF_COLS == COLS,
44
19.9k
                "ReinterpretingDCT should only be called with LF == N");
45
19.9k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
19.9k
                                 scratch_space);
47
19.9k
  if (ROWS < COLS) {
48
99.6k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
717k
      for (size_t x = 0; x < LF_COLS; x++) {
50
637k
        output[y * output_stride + x] =
51
637k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
637k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
637k
      }
54
79.6k
    }
55
19.9k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
19.9k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
190k
                                  float* JXL_RESTRICT scratch_space) {
41
190k
  static_assert(LF_ROWS == ROWS,
42
190k
                "ReinterpretingDCT should only be called with LF == N");
43
190k
  static_assert(LF_COLS == COLS,
44
190k
                "ReinterpretingDCT should only be called with LF == N");
45
190k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
190k
                                 scratch_space);
47
190k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
190k
  } else {
56
1.71M
    for (size_t y = 0; y < LF_COLS; y++) {
57
13.7M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
12.2M
        output[y * output_stride + x] =
59
12.2M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
12.2M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
12.2M
      }
62
1.52M
    }
63
190k
  }
64
190k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
3
                                  float* JXL_RESTRICT scratch_space) {
41
3
  static_assert(LF_ROWS == ROWS,
42
3
                "ReinterpretingDCT should only be called with LF == N");
43
3
  static_assert(LF_COLS == COLS,
44
3
                "ReinterpretingDCT should only be called with LF == N");
45
3
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
3
                                 scratch_space);
47
3
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
3
  } else {
56
27
    for (size_t y = 0; y < LF_COLS; y++) {
57
408
      for (size_t x = 0; x < LF_ROWS; x++) {
58
384
        output[y * output_stride + x] =
59
384
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
384
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
384
      }
62
24
    }
63
3
  }
64
3
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
3
                                  float* JXL_RESTRICT scratch_space) {
41
3
  static_assert(LF_ROWS == ROWS,
42
3
                "ReinterpretingDCT should only be called with LF == N");
43
3
  static_assert(LF_COLS == COLS,
44
3
                "ReinterpretingDCT should only be called with LF == N");
45
3
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
3
                                 scratch_space);
47
3
  if (ROWS < COLS) {
48
27
    for (size_t y = 0; y < LF_ROWS; y++) {
49
408
      for (size_t x = 0; x < LF_COLS; x++) {
50
384
        output[y * output_stride + x] =
51
384
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
384
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
384
      }
54
24
    }
55
3
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
3
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
18
                                  float* JXL_RESTRICT scratch_space) {
41
18
  static_assert(LF_ROWS == ROWS,
42
18
                "ReinterpretingDCT should only be called with LF == N");
43
18
  static_assert(LF_COLS == COLS,
44
18
                "ReinterpretingDCT should only be called with LF == N");
45
18
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
18
                                 scratch_space);
47
18
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
18
  } else {
56
306
    for (size_t y = 0; y < LF_COLS; y++) {
57
4.89k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
4.60k
        output[y * output_stride + x] =
59
4.60k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
4.60k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
4.60k
      }
62
288
    }
63
18
  }
64
18
}
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
65
66
template <size_t S>
67
64.0M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
64.0M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
64.0M
  static_assert(S % 2 == 0, "S should be even");
70
64.0M
  float temp[kDCTBlockSize];
71
64.0M
  constexpr size_t num_2x2 = S / 2;
72
213M
  for (size_t y = 0; y < num_2x2; y++) {
73
598M
    for (size_t x = 0; x < num_2x2; x++) {
74
448M
      float c00 = block[y * kBlockDim + x];
75
448M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
448M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
448M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
448M
      float r00 = c00 + c01 + c10 + c11;
79
448M
      float r01 = c00 + c01 - c10 - c11;
80
448M
      float r10 = c00 - c01 + c10 - c11;
81
448M
      float r11 = c00 - c01 - c10 + c11;
82
448M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
448M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
448M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
448M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
448M
    }
87
149M
  }
88
363M
  for (size_t y = 0; y < S; y++) {
89
2.09G
    for (size_t x = 0; x < S; x++) {
90
1.79G
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
1.79G
    }
92
299M
  }
93
64.0M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
14.6M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
14.6M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
14.6M
  static_assert(S % 2 == 0, "S should be even");
70
14.6M
  float temp[kDCTBlockSize];
71
14.6M
  constexpr size_t num_2x2 = S / 2;
72
29.2M
  for (size_t y = 0; y < num_2x2; y++) {
73
29.2M
    for (size_t x = 0; x < num_2x2; x++) {
74
14.6M
      float c00 = block[y * kBlockDim + x];
75
14.6M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
14.6M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
14.6M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
14.6M
      float r00 = c00 + c01 + c10 + c11;
79
14.6M
      float r01 = c00 + c01 - c10 - c11;
80
14.6M
      float r10 = c00 - c01 + c10 - c11;
81
14.6M
      float r11 = c00 - c01 - c10 + c11;
82
14.6M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
14.6M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
14.6M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
14.6M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
14.6M
    }
87
14.6M
  }
88
43.8M
  for (size_t y = 0; y < S; y++) {
89
87.6M
    for (size_t x = 0; x < S; x++) {
90
58.4M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
58.4M
    }
92
29.2M
  }
93
14.6M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
14.6M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
14.6M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
14.6M
  static_assert(S % 2 == 0, "S should be even");
70
14.6M
  float temp[kDCTBlockSize];
71
14.6M
  constexpr size_t num_2x2 = S / 2;
72
43.8M
  for (size_t y = 0; y < num_2x2; y++) {
73
87.6M
    for (size_t x = 0; x < num_2x2; x++) {
74
58.4M
      float c00 = block[y * kBlockDim + x];
75
58.4M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
58.4M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
58.4M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
58.4M
      float r00 = c00 + c01 + c10 + c11;
79
58.4M
      float r01 = c00 + c01 - c10 - c11;
80
58.4M
      float r10 = c00 - c01 + c10 - c11;
81
58.4M
      float r11 = c00 - c01 - c10 + c11;
82
58.4M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
58.4M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
58.4M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
58.4M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
58.4M
    }
87
29.2M
  }
88
73.0M
  for (size_t y = 0; y < S; y++) {
89
292M
    for (size_t x = 0; x < S; x++) {
90
233M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
233M
    }
92
58.4M
  }
93
14.6M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
14.6M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
14.6M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
14.6M
  static_assert(S % 2 == 0, "S should be even");
70
14.6M
  float temp[kDCTBlockSize];
71
14.6M
  constexpr size_t num_2x2 = S / 2;
72
73.0M
  for (size_t y = 0; y < num_2x2; y++) {
73
292M
    for (size_t x = 0; x < num_2x2; x++) {
74
233M
      float c00 = block[y * kBlockDim + x];
75
233M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
233M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
233M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
233M
      float r00 = c00 + c01 + c10 + c11;
79
233M
      float r01 = c00 + c01 - c10 - c11;
80
233M
      float r10 = c00 - c01 + c10 - c11;
81
233M
      float r11 = c00 - c01 - c10 + c11;
82
233M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
233M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
233M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
233M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
233M
    }
87
58.4M
  }
88
131M
  for (size_t y = 0; y < S; y++) {
89
1.05G
    for (size_t x = 0; x < S; x++) {
90
934M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
934M
    }
92
116M
  }
93
14.6M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
6.75M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
6.75M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
6.75M
  static_assert(S % 2 == 0, "S should be even");
70
6.75M
  float temp[kDCTBlockSize];
71
6.75M
  constexpr size_t num_2x2 = S / 2;
72
13.5M
  for (size_t y = 0; y < num_2x2; y++) {
73
13.5M
    for (size_t x = 0; x < num_2x2; x++) {
74
6.75M
      float c00 = block[y * kBlockDim + x];
75
6.75M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
6.75M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
6.75M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
6.75M
      float r00 = c00 + c01 + c10 + c11;
79
6.75M
      float r01 = c00 + c01 - c10 - c11;
80
6.75M
      float r10 = c00 - c01 + c10 - c11;
81
6.75M
      float r11 = c00 - c01 - c10 + c11;
82
6.75M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
6.75M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
6.75M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
6.75M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
6.75M
    }
87
6.75M
  }
88
20.2M
  for (size_t y = 0; y < S; y++) {
89
40.5M
    for (size_t x = 0; x < S; x++) {
90
27.0M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
27.0M
    }
92
13.5M
  }
93
6.75M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
6.75M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
6.75M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
6.75M
  static_assert(S % 2 == 0, "S should be even");
70
6.75M
  float temp[kDCTBlockSize];
71
6.75M
  constexpr size_t num_2x2 = S / 2;
72
20.2M
  for (size_t y = 0; y < num_2x2; y++) {
73
40.5M
    for (size_t x = 0; x < num_2x2; x++) {
74
27.0M
      float c00 = block[y * kBlockDim + x];
75
27.0M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
27.0M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
27.0M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
27.0M
      float r00 = c00 + c01 + c10 + c11;
79
27.0M
      float r01 = c00 + c01 - c10 - c11;
80
27.0M
      float r10 = c00 - c01 + c10 - c11;
81
27.0M
      float r11 = c00 - c01 - c10 + c11;
82
27.0M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
27.0M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
27.0M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
27.0M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
27.0M
    }
87
13.5M
  }
88
33.7M
  for (size_t y = 0; y < S; y++) {
89
135M
    for (size_t x = 0; x < S; x++) {
90
108M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
108M
    }
92
27.0M
  }
93
6.75M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
6.75M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
6.75M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
6.75M
  static_assert(S % 2 == 0, "S should be even");
70
6.75M
  float temp[kDCTBlockSize];
71
6.75M
  constexpr size_t num_2x2 = S / 2;
72
33.7M
  for (size_t y = 0; y < num_2x2; y++) {
73
135M
    for (size_t x = 0; x < num_2x2; x++) {
74
108M
      float c00 = block[y * kBlockDim + x];
75
108M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
108M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
108M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
108M
      float r00 = c00 + c01 + c10 + c11;
79
108M
      float r01 = c00 + c01 - c10 - c11;
80
108M
      float r10 = c00 - c01 + c10 - c11;
81
108M
      float r11 = c00 - c01 - c10 + c11;
82
108M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
108M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
108M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
108M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
108M
    }
87
27.0M
  }
88
60.7M
  for (size_t y = 0; y < S; y++) {
89
486M
    for (size_t x = 0; x < S; x++) {
90
432M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
432M
    }
92
54.0M
  }
93
6.75M
}
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
94
95
59.4M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
59.4M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
59.4M
      {
98
59.4M
          0.25,
99
59.4M
          0.25,
100
59.4M
          0.25,
101
59.4M
          0.25,
102
59.4M
          0.25,
103
59.4M
          0.25,
104
59.4M
          0.25,
105
59.4M
          0.25,
106
59.4M
          0.25,
107
59.4M
          0.25,
108
59.4M
          0.25,
109
59.4M
          0.25,
110
59.4M
          0.25,
111
59.4M
          0.25,
112
59.4M
          0.25,
113
59.4M
          0.25,
114
59.4M
      },
115
59.4M
      {
116
59.4M
          0.876902929799142f,
117
59.4M
          0.2206518106944235f,
118
59.4M
          -0.10140050393753763f,
119
59.4M
          -0.1014005039375375f,
120
59.4M
          0.2206518106944236f,
121
59.4M
          -0.10140050393753777f,
122
59.4M
          -0.10140050393753772f,
123
59.4M
          -0.10140050393753763f,
124
59.4M
          -0.10140050393753758f,
125
59.4M
          -0.10140050393753769f,
126
59.4M
          -0.1014005039375375f,
127
59.4M
          -0.10140050393753768f,
128
59.4M
          -0.10140050393753768f,
129
59.4M
          -0.10140050393753759f,
130
59.4M
          -0.10140050393753763f,
131
59.4M
          -0.10140050393753741f,
132
59.4M
      },
133
59.4M
      {
134
59.4M
          0.0,
135
59.4M
          0.0,
136
59.4M
          0.40670075830260755f,
137
59.4M
          0.44444816619734445f,
138
59.4M
          0.0,
139
59.4M
          0.0,
140
59.4M
          0.19574399372042936f,
141
59.4M
          0.2929100136981264f,
142
59.4M
          -0.40670075830260716f,
143
59.4M
          -0.19574399372042872f,
144
59.4M
          0.0,
145
59.4M
          0.11379074460448091f,
146
59.4M
          -0.44444816619734384f,
147
59.4M
          -0.29291001369812636f,
148
59.4M
          -0.1137907446044814f,
149
59.4M
          0.0,
150
59.4M
      },
151
59.4M
      {
152
59.4M
          0.0,
153
59.4M
          0.0,
154
59.4M
          -0.21255748058288748f,
155
59.4M
          0.3085497062849767f,
156
59.4M
          0.0,
157
59.4M
          0.4706702258572536f,
158
59.4M
          -0.1621205195722993f,
159
59.4M
          0.0,
160
59.4M
          -0.21255748058287047f,
161
59.4M
          -0.16212051957228327f,
162
59.4M
          -0.47067022585725277f,
163
59.4M
          -0.1464291867126764f,
164
59.4M
          0.3085497062849487f,
165
59.4M
          0.0,
166
59.4M
          -0.14642918671266536f,
167
59.4M
          0.4251149611657548f,
168
59.4M
      },
169
59.4M
      {
170
59.4M
          0.0,
171
59.4M
          -0.7071067811865474f,
172
59.4M
          0.0,
173
59.4M
          0.0,
174
59.4M
          0.7071067811865476f,
175
59.4M
          0.0,
176
59.4M
          0.0,
177
59.4M
          0.0,
178
59.4M
          0.0,
179
59.4M
          0.0,
180
59.4M
          0.0,
181
59.4M
          0.0,
182
59.4M
          0.0,
183
59.4M
          0.0,
184
59.4M
          0.0,
185
59.4M
          0.0,
186
59.4M
      },
187
59.4M
      {
188
59.4M
          -0.4105377591765233f,
189
59.4M
          0.6235485373547691f,
190
59.4M
          -0.06435071657946274f,
191
59.4M
          -0.06435071657946266f,
192
59.4M
          0.6235485373547694f,
193
59.4M
          -0.06435071657946284f,
194
59.4M
          -0.0643507165794628f,
195
59.4M
          -0.06435071657946274f,
196
59.4M
          -0.06435071657946272f,
197
59.4M
          -0.06435071657946279f,
198
59.4M
          -0.06435071657946266f,
199
59.4M
          -0.06435071657946277f,
200
59.4M
          -0.06435071657946277f,
201
59.4M
          -0.06435071657946273f,
202
59.4M
          -0.06435071657946274f,
203
59.4M
          -0.0643507165794626f,
204
59.4M
      },
205
59.4M
      {
206
59.4M
          0.0,
207
59.4M
          0.0,
208
59.4M
          -0.4517556589999482f,
209
59.4M
          0.15854503551840063f,
210
59.4M
          0.0,
211
59.4M
          -0.04038515160822202f,
212
59.4M
          0.0074182263792423875f,
213
59.4M
          0.39351034269210167f,
214
59.4M
          -0.45175565899994635f,
215
59.4M
          0.007418226379244351f,
216
59.4M
          0.1107416575309343f,
217
59.4M
          0.08298163094882051f,
218
59.4M
          0.15854503551839705f,
219
59.4M
          0.3935103426921022f,
220
59.4M
          0.0829816309488214f,
221
59.4M
          -0.45175565899994796f,
222
59.4M
      },
223
59.4M
      {
224
59.4M
          0.0,
225
59.4M
          0.0,
226
59.4M
          -0.304684750724869f,
227
59.4M
          0.5112616136591823f,
228
59.4M
          0.0,
229
59.4M
          0.0,
230
59.4M
          -0.290480129728998f,
231
59.4M
          -0.06578701549142804f,
232
59.4M
          0.304684750724884f,
233
59.4M
          0.2904801297290076f,
234
59.4M
          0.0,
235
59.4M
          -0.23889773523344604f,
236
59.4M
          -0.5112616136592012f,
237
59.4M
          0.06578701549142545f,
238
59.4M
          0.23889773523345467f,
239
59.4M
          0.0,
240
59.4M
      },
241
59.4M
      {
242
59.4M
          0.0,
243
59.4M
          0.0,
244
59.4M
          0.3017929516615495f,
245
59.4M
          0.25792362796341184f,
246
59.4M
          0.0,
247
59.4M
          0.16272340142866204f,
248
59.4M
          0.09520022653475037f,
249
59.4M
          0.0,
250
59.4M
          0.3017929516615503f,
251
59.4M
          0.09520022653475055f,
252
59.4M
          -0.16272340142866173f,
253
59.4M
          -0.35312385449816297f,
254
59.4M
          0.25792362796341295f,
255
59.4M
          0.0,
256
59.4M
          -0.3531238544981624f,
257
59.4M
          -0.6035859033230976f,
258
59.4M
      },
259
59.4M
      {
260
59.4M
          0.0,
261
59.4M
          0.0,
262
59.4M
          0.40824829046386274f,
263
59.4M
          0.0,
264
59.4M
          0.0,
265
59.4M
          0.0,
266
59.4M
          0.0,
267
59.4M
          -0.4082482904638628f,
268
59.4M
          -0.4082482904638635f,
269
59.4M
          0.0,
270
59.4M
          0.0,
271
59.4M
          -0.40824829046386296f,
272
59.4M
          0.0,
273
59.4M
          0.4082482904638634f,
274
59.4M
          0.408248290463863f,
275
59.4M
          0.0,
276
59.4M
      },
277
59.4M
      {
278
59.4M
          0.0,
279
59.4M
          0.0,
280
59.4M
          0.1747866975480809f,
281
59.4M
          0.0812611176717539f,
282
59.4M
          0.0,
283
59.4M
          0.0,
284
59.4M
          -0.3675398009862027f,
285
59.4M
          -0.307882213957909f,
286
59.4M
          -0.17478669754808135f,
287
59.4M
          0.3675398009862011f,
288
59.4M
          0.0,
289
59.4M
          0.4826689115059883f,
290
59.4M
          -0.08126111767175039f,
291
59.4M
          0.30788221395790305f,
292
59.4M
          -0.48266891150598584f,
293
59.4M
          0.0,
294
59.4M
      },
295
59.4M
      {
296
59.4M
          0.0,
297
59.4M
          0.0,
298
59.4M
          -0.21105601049335784f,
299
59.4M
          0.18567180916109802f,
300
59.4M
          0.0,
301
59.4M
          0.0,
302
59.4M
          0.49215859013738733f,
303
59.4M
          -0.38525013709251915f,
304
59.4M
          0.21105601049335806f,
305
59.4M
          -0.49215859013738905f,
306
59.4M
          0.0,
307
59.4M
          0.17419412659916217f,
308
59.4M
          -0.18567180916109904f,
309
59.4M
          0.3852501370925211f,
310
59.4M
          -0.1741941265991621f,
311
59.4M
          0.0,
312
59.4M
      },
313
59.4M
      {
314
59.4M
          0.0,
315
59.4M
          0.0,
316
59.4M
          -0.14266084808807264f,
317
59.4M
          -0.3416446842253372f,
318
59.4M
          0.0,
319
59.4M
          0.7367497537172237f,
320
59.4M
          0.24627107722075148f,
321
59.4M
          -0.08574019035519306f,
322
59.4M
          -0.14266084808807344f,
323
59.4M
          0.24627107722075137f,
324
59.4M
          0.14883399227113567f,
325
59.4M
          -0.04768680350229251f,
326
59.4M
          -0.3416446842253373f,
327
59.4M
          -0.08574019035519267f,
328
59.4M
          -0.047686803502292804f,
329
59.4M
          -0.14266084808807242f,
330
59.4M
      },
331
59.4M
      {
332
59.4M
          0.0,
333
59.4M
          0.0,
334
59.4M
          -0.13813540350758585f,
335
59.4M
          0.3302282550303788f,
336
59.4M
          0.0,
337
59.4M
          0.08755115000587084f,
338
59.4M
          -0.07946706605909573f,
339
59.4M
          -0.4613374887461511f,
340
59.4M
          -0.13813540350758294f,
341
59.4M
          -0.07946706605910261f,
342
59.4M
          0.49724647109535086f,
343
59.4M
          0.12538059448563663f,
344
59.4M
          0.3302282550303805f,
345
59.4M
          -0.4613374887461554f,
346
59.4M
          0.12538059448564315f,
347
59.4M
          -0.13813540350758452f,
348
59.4M
      },
349
59.4M
      {
350
59.4M
          0.0,
351
59.4M
          0.0,
352
59.4M
          -0.17437602599651067f,
353
59.4M
          0.0702790691196284f,
354
59.4M
          0.0,
355
59.4M
          -0.2921026642334881f,
356
59.4M
          0.3623817333531167f,
357
59.4M
          0.0,
358
59.4M
          -0.1743760259965108f,
359
59.4M
          0.36238173335311646f,
360
59.4M
          0.29210266423348785f,
361
59.4M
          -0.4326608024727445f,
362
59.4M
          0.07027906911962818f,
363
59.4M
          0.0,
364
59.4M
          -0.4326608024727457f,
365
59.4M
          0.34875205199302267f,
366
59.4M
      },
367
59.4M
      {
368
59.4M
          0.0,
369
59.4M
          0.0,
370
59.4M
          0.11354987314994337f,
371
59.4M
          -0.07417504595810355f,
372
59.4M
          0.0,
373
59.4M
          0.19402893032594343f,
374
59.4M
          -0.435190496523228f,
375
59.4M
          0.21918684838857466f,
376
59.4M
          0.11354987314994257f,
377
59.4M
          -0.4351904965232251f,
378
59.4M
          0.5550443808910661f,
379
59.4M
          -0.25468277124066463f,
380
59.4M
          -0.07417504595810233f,
381
59.4M
          0.2191868483885728f,
382
59.4M
          -0.25468277124066413f,
383
59.4M
          0.1135498731499429f,
384
59.4M
      },
385
59.4M
  };
386
387
59.4M
  const HWY_CAPPED(float, 16) d;
388
178M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
118M
    auto pixel = Zero(d);
390
2.02G
    for (size_t j = 0; j < 16; j++) {
391
1.90G
      auto cf = Set(d, coeffs[j]);
392
1.90G
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
1.90G
      pixel = MulAdd(cf, basis, pixel);
394
1.90G
    }
395
118M
    Store(pixel, d, pixels + i);
396
118M
  }
397
59.4M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
58.4M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
58.4M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
58.4M
      {
98
58.4M
          0.25,
99
58.4M
          0.25,
100
58.4M
          0.25,
101
58.4M
          0.25,
102
58.4M
          0.25,
103
58.4M
          0.25,
104
58.4M
          0.25,
105
58.4M
          0.25,
106
58.4M
          0.25,
107
58.4M
          0.25,
108
58.4M
          0.25,
109
58.4M
          0.25,
110
58.4M
          0.25,
111
58.4M
          0.25,
112
58.4M
          0.25,
113
58.4M
          0.25,
114
58.4M
      },
115
58.4M
      {
116
58.4M
          0.876902929799142f,
117
58.4M
          0.2206518106944235f,
118
58.4M
          -0.10140050393753763f,
119
58.4M
          -0.1014005039375375f,
120
58.4M
          0.2206518106944236f,
121
58.4M
          -0.10140050393753777f,
122
58.4M
          -0.10140050393753772f,
123
58.4M
          -0.10140050393753763f,
124
58.4M
          -0.10140050393753758f,
125
58.4M
          -0.10140050393753769f,
126
58.4M
          -0.1014005039375375f,
127
58.4M
          -0.10140050393753768f,
128
58.4M
          -0.10140050393753768f,
129
58.4M
          -0.10140050393753759f,
130
58.4M
          -0.10140050393753763f,
131
58.4M
          -0.10140050393753741f,
132
58.4M
      },
133
58.4M
      {
134
58.4M
          0.0,
135
58.4M
          0.0,
136
58.4M
          0.40670075830260755f,
137
58.4M
          0.44444816619734445f,
138
58.4M
          0.0,
139
58.4M
          0.0,
140
58.4M
          0.19574399372042936f,
141
58.4M
          0.2929100136981264f,
142
58.4M
          -0.40670075830260716f,
143
58.4M
          -0.19574399372042872f,
144
58.4M
          0.0,
145
58.4M
          0.11379074460448091f,
146
58.4M
          -0.44444816619734384f,
147
58.4M
          -0.29291001369812636f,
148
58.4M
          -0.1137907446044814f,
149
58.4M
          0.0,
150
58.4M
      },
151
58.4M
      {
152
58.4M
          0.0,
153
58.4M
          0.0,
154
58.4M
          -0.21255748058288748f,
155
58.4M
          0.3085497062849767f,
156
58.4M
          0.0,
157
58.4M
          0.4706702258572536f,
158
58.4M
          -0.1621205195722993f,
159
58.4M
          0.0,
160
58.4M
          -0.21255748058287047f,
161
58.4M
          -0.16212051957228327f,
162
58.4M
          -0.47067022585725277f,
163
58.4M
          -0.1464291867126764f,
164
58.4M
          0.3085497062849487f,
165
58.4M
          0.0,
166
58.4M
          -0.14642918671266536f,
167
58.4M
          0.4251149611657548f,
168
58.4M
      },
169
58.4M
      {
170
58.4M
          0.0,
171
58.4M
          -0.7071067811865474f,
172
58.4M
          0.0,
173
58.4M
          0.0,
174
58.4M
          0.7071067811865476f,
175
58.4M
          0.0,
176
58.4M
          0.0,
177
58.4M
          0.0,
178
58.4M
          0.0,
179
58.4M
          0.0,
180
58.4M
          0.0,
181
58.4M
          0.0,
182
58.4M
          0.0,
183
58.4M
          0.0,
184
58.4M
          0.0,
185
58.4M
          0.0,
186
58.4M
      },
187
58.4M
      {
188
58.4M
          -0.4105377591765233f,
189
58.4M
          0.6235485373547691f,
190
58.4M
          -0.06435071657946274f,
191
58.4M
          -0.06435071657946266f,
192
58.4M
          0.6235485373547694f,
193
58.4M
          -0.06435071657946284f,
194
58.4M
          -0.0643507165794628f,
195
58.4M
          -0.06435071657946274f,
196
58.4M
          -0.06435071657946272f,
197
58.4M
          -0.06435071657946279f,
198
58.4M
          -0.06435071657946266f,
199
58.4M
          -0.06435071657946277f,
200
58.4M
          -0.06435071657946277f,
201
58.4M
          -0.06435071657946273f,
202
58.4M
          -0.06435071657946274f,
203
58.4M
          -0.0643507165794626f,
204
58.4M
      },
205
58.4M
      {
206
58.4M
          0.0,
207
58.4M
          0.0,
208
58.4M
          -0.4517556589999482f,
209
58.4M
          0.15854503551840063f,
210
58.4M
          0.0,
211
58.4M
          -0.04038515160822202f,
212
58.4M
          0.0074182263792423875f,
213
58.4M
          0.39351034269210167f,
214
58.4M
          -0.45175565899994635f,
215
58.4M
          0.007418226379244351f,
216
58.4M
          0.1107416575309343f,
217
58.4M
          0.08298163094882051f,
218
58.4M
          0.15854503551839705f,
219
58.4M
          0.3935103426921022f,
220
58.4M
          0.0829816309488214f,
221
58.4M
          -0.45175565899994796f,
222
58.4M
      },
223
58.4M
      {
224
58.4M
          0.0,
225
58.4M
          0.0,
226
58.4M
          -0.304684750724869f,
227
58.4M
          0.5112616136591823f,
228
58.4M
          0.0,
229
58.4M
          0.0,
230
58.4M
          -0.290480129728998f,
231
58.4M
          -0.06578701549142804f,
232
58.4M
          0.304684750724884f,
233
58.4M
          0.2904801297290076f,
234
58.4M
          0.0,
235
58.4M
          -0.23889773523344604f,
236
58.4M
          -0.5112616136592012f,
237
58.4M
          0.06578701549142545f,
238
58.4M
          0.23889773523345467f,
239
58.4M
          0.0,
240
58.4M
      },
241
58.4M
      {
242
58.4M
          0.0,
243
58.4M
          0.0,
244
58.4M
          0.3017929516615495f,
245
58.4M
          0.25792362796341184f,
246
58.4M
          0.0,
247
58.4M
          0.16272340142866204f,
248
58.4M
          0.09520022653475037f,
249
58.4M
          0.0,
250
58.4M
          0.3017929516615503f,
251
58.4M
          0.09520022653475055f,
252
58.4M
          -0.16272340142866173f,
253
58.4M
          -0.35312385449816297f,
254
58.4M
          0.25792362796341295f,
255
58.4M
          0.0,
256
58.4M
          -0.3531238544981624f,
257
58.4M
          -0.6035859033230976f,
258
58.4M
      },
259
58.4M
      {
260
58.4M
          0.0,
261
58.4M
          0.0,
262
58.4M
          0.40824829046386274f,
263
58.4M
          0.0,
264
58.4M
          0.0,
265
58.4M
          0.0,
266
58.4M
          0.0,
267
58.4M
          -0.4082482904638628f,
268
58.4M
          -0.4082482904638635f,
269
58.4M
          0.0,
270
58.4M
          0.0,
271
58.4M
          -0.40824829046386296f,
272
58.4M
          0.0,
273
58.4M
          0.4082482904638634f,
274
58.4M
          0.408248290463863f,
275
58.4M
          0.0,
276
58.4M
      },
277
58.4M
      {
278
58.4M
          0.0,
279
58.4M
          0.0,
280
58.4M
          0.1747866975480809f,
281
58.4M
          0.0812611176717539f,
282
58.4M
          0.0,
283
58.4M
          0.0,
284
58.4M
          -0.3675398009862027f,
285
58.4M
          -0.307882213957909f,
286
58.4M
          -0.17478669754808135f,
287
58.4M
          0.3675398009862011f,
288
58.4M
          0.0,
289
58.4M
          0.4826689115059883f,
290
58.4M
          -0.08126111767175039f,
291
58.4M
          0.30788221395790305f,
292
58.4M
          -0.48266891150598584f,
293
58.4M
          0.0,
294
58.4M
      },
295
58.4M
      {
296
58.4M
          0.0,
297
58.4M
          0.0,
298
58.4M
          -0.21105601049335784f,
299
58.4M
          0.18567180916109802f,
300
58.4M
          0.0,
301
58.4M
          0.0,
302
58.4M
          0.49215859013738733f,
303
58.4M
          -0.38525013709251915f,
304
58.4M
          0.21105601049335806f,
305
58.4M
          -0.49215859013738905f,
306
58.4M
          0.0,
307
58.4M
          0.17419412659916217f,
308
58.4M
          -0.18567180916109904f,
309
58.4M
          0.3852501370925211f,
310
58.4M
          -0.1741941265991621f,
311
58.4M
          0.0,
312
58.4M
      },
313
58.4M
      {
314
58.4M
          0.0,
315
58.4M
          0.0,
316
58.4M
          -0.14266084808807264f,
317
58.4M
          -0.3416446842253372f,
318
58.4M
          0.0,
319
58.4M
          0.7367497537172237f,
320
58.4M
          0.24627107722075148f,
321
58.4M
          -0.08574019035519306f,
322
58.4M
          -0.14266084808807344f,
323
58.4M
          0.24627107722075137f,
324
58.4M
          0.14883399227113567f,
325
58.4M
          -0.04768680350229251f,
326
58.4M
          -0.3416446842253373f,
327
58.4M
          -0.08574019035519267f,
328
58.4M
          -0.047686803502292804f,
329
58.4M
          -0.14266084808807242f,
330
58.4M
      },
331
58.4M
      {
332
58.4M
          0.0,
333
58.4M
          0.0,
334
58.4M
          -0.13813540350758585f,
335
58.4M
          0.3302282550303788f,
336
58.4M
          0.0,
337
58.4M
          0.08755115000587084f,
338
58.4M
          -0.07946706605909573f,
339
58.4M
          -0.4613374887461511f,
340
58.4M
          -0.13813540350758294f,
341
58.4M
          -0.07946706605910261f,
342
58.4M
          0.49724647109535086f,
343
58.4M
          0.12538059448563663f,
344
58.4M
          0.3302282550303805f,
345
58.4M
          -0.4613374887461554f,
346
58.4M
          0.12538059448564315f,
347
58.4M
          -0.13813540350758452f,
348
58.4M
      },
349
58.4M
      {
350
58.4M
          0.0,
351
58.4M
          0.0,
352
58.4M
          -0.17437602599651067f,
353
58.4M
          0.0702790691196284f,
354
58.4M
          0.0,
355
58.4M
          -0.2921026642334881f,
356
58.4M
          0.3623817333531167f,
357
58.4M
          0.0,
358
58.4M
          -0.1743760259965108f,
359
58.4M
          0.36238173335311646f,
360
58.4M
          0.29210266423348785f,
361
58.4M
          -0.4326608024727445f,
362
58.4M
          0.07027906911962818f,
363
58.4M
          0.0,
364
58.4M
          -0.4326608024727457f,
365
58.4M
          0.34875205199302267f,
366
58.4M
      },
367
58.4M
      {
368
58.4M
          0.0,
369
58.4M
          0.0,
370
58.4M
          0.11354987314994337f,
371
58.4M
          -0.07417504595810355f,
372
58.4M
          0.0,
373
58.4M
          0.19402893032594343f,
374
58.4M
          -0.435190496523228f,
375
58.4M
          0.21918684838857466f,
376
58.4M
          0.11354987314994257f,
377
58.4M
          -0.4351904965232251f,
378
58.4M
          0.5550443808910661f,
379
58.4M
          -0.25468277124066463f,
380
58.4M
          -0.07417504595810233f,
381
58.4M
          0.2191868483885728f,
382
58.4M
          -0.25468277124066413f,
383
58.4M
          0.1135498731499429f,
384
58.4M
      },
385
58.4M
  };
386
387
58.4M
  const HWY_CAPPED(float, 16) d;
388
175M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
116M
    auto pixel = Zero(d);
390
1.98G
    for (size_t j = 0; j < 16; j++) {
391
1.86G
      auto cf = Set(d, coeffs[j]);
392
1.86G
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
1.86G
      pixel = MulAdd(cf, basis, pixel);
394
1.86G
    }
395
116M
    Store(pixel, d, pixels + i);
396
116M
  }
397
58.4M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
985k
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
985k
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
985k
      {
98
985k
          0.25,
99
985k
          0.25,
100
985k
          0.25,
101
985k
          0.25,
102
985k
          0.25,
103
985k
          0.25,
104
985k
          0.25,
105
985k
          0.25,
106
985k
          0.25,
107
985k
          0.25,
108
985k
          0.25,
109
985k
          0.25,
110
985k
          0.25,
111
985k
          0.25,
112
985k
          0.25,
113
985k
          0.25,
114
985k
      },
115
985k
      {
116
985k
          0.876902929799142f,
117
985k
          0.2206518106944235f,
118
985k
          -0.10140050393753763f,
119
985k
          -0.1014005039375375f,
120
985k
          0.2206518106944236f,
121
985k
          -0.10140050393753777f,
122
985k
          -0.10140050393753772f,
123
985k
          -0.10140050393753763f,
124
985k
          -0.10140050393753758f,
125
985k
          -0.10140050393753769f,
126
985k
          -0.1014005039375375f,
127
985k
          -0.10140050393753768f,
128
985k
          -0.10140050393753768f,
129
985k
          -0.10140050393753759f,
130
985k
          -0.10140050393753763f,
131
985k
          -0.10140050393753741f,
132
985k
      },
133
985k
      {
134
985k
          0.0,
135
985k
          0.0,
136
985k
          0.40670075830260755f,
137
985k
          0.44444816619734445f,
138
985k
          0.0,
139
985k
          0.0,
140
985k
          0.19574399372042936f,
141
985k
          0.2929100136981264f,
142
985k
          -0.40670075830260716f,
143
985k
          -0.19574399372042872f,
144
985k
          0.0,
145
985k
          0.11379074460448091f,
146
985k
          -0.44444816619734384f,
147
985k
          -0.29291001369812636f,
148
985k
          -0.1137907446044814f,
149
985k
          0.0,
150
985k
      },
151
985k
      {
152
985k
          0.0,
153
985k
          0.0,
154
985k
          -0.21255748058288748f,
155
985k
          0.3085497062849767f,
156
985k
          0.0,
157
985k
          0.4706702258572536f,
158
985k
          -0.1621205195722993f,
159
985k
          0.0,
160
985k
          -0.21255748058287047f,
161
985k
          -0.16212051957228327f,
162
985k
          -0.47067022585725277f,
163
985k
          -0.1464291867126764f,
164
985k
          0.3085497062849487f,
165
985k
          0.0,
166
985k
          -0.14642918671266536f,
167
985k
          0.4251149611657548f,
168
985k
      },
169
985k
      {
170
985k
          0.0,
171
985k
          -0.7071067811865474f,
172
985k
          0.0,
173
985k
          0.0,
174
985k
          0.7071067811865476f,
175
985k
          0.0,
176
985k
          0.0,
177
985k
          0.0,
178
985k
          0.0,
179
985k
          0.0,
180
985k
          0.0,
181
985k
          0.0,
182
985k
          0.0,
183
985k
          0.0,
184
985k
          0.0,
185
985k
          0.0,
186
985k
      },
187
985k
      {
188
985k
          -0.4105377591765233f,
189
985k
          0.6235485373547691f,
190
985k
          -0.06435071657946274f,
191
985k
          -0.06435071657946266f,
192
985k
          0.6235485373547694f,
193
985k
          -0.06435071657946284f,
194
985k
          -0.0643507165794628f,
195
985k
          -0.06435071657946274f,
196
985k
          -0.06435071657946272f,
197
985k
          -0.06435071657946279f,
198
985k
          -0.06435071657946266f,
199
985k
          -0.06435071657946277f,
200
985k
          -0.06435071657946277f,
201
985k
          -0.06435071657946273f,
202
985k
          -0.06435071657946274f,
203
985k
          -0.0643507165794626f,
204
985k
      },
205
985k
      {
206
985k
          0.0,
207
985k
          0.0,
208
985k
          -0.4517556589999482f,
209
985k
          0.15854503551840063f,
210
985k
          0.0,
211
985k
          -0.04038515160822202f,
212
985k
          0.0074182263792423875f,
213
985k
          0.39351034269210167f,
214
985k
          -0.45175565899994635f,
215
985k
          0.007418226379244351f,
216
985k
          0.1107416575309343f,
217
985k
          0.08298163094882051f,
218
985k
          0.15854503551839705f,
219
985k
          0.3935103426921022f,
220
985k
          0.0829816309488214f,
221
985k
          -0.45175565899994796f,
222
985k
      },
223
985k
      {
224
985k
          0.0,
225
985k
          0.0,
226
985k
          -0.304684750724869f,
227
985k
          0.5112616136591823f,
228
985k
          0.0,
229
985k
          0.0,
230
985k
          -0.290480129728998f,
231
985k
          -0.06578701549142804f,
232
985k
          0.304684750724884f,
233
985k
          0.2904801297290076f,
234
985k
          0.0,
235
985k
          -0.23889773523344604f,
236
985k
          -0.5112616136592012f,
237
985k
          0.06578701549142545f,
238
985k
          0.23889773523345467f,
239
985k
          0.0,
240
985k
      },
241
985k
      {
242
985k
          0.0,
243
985k
          0.0,
244
985k
          0.3017929516615495f,
245
985k
          0.25792362796341184f,
246
985k
          0.0,
247
985k
          0.16272340142866204f,
248
985k
          0.09520022653475037f,
249
985k
          0.0,
250
985k
          0.3017929516615503f,
251
985k
          0.09520022653475055f,
252
985k
          -0.16272340142866173f,
253
985k
          -0.35312385449816297f,
254
985k
          0.25792362796341295f,
255
985k
          0.0,
256
985k
          -0.3531238544981624f,
257
985k
          -0.6035859033230976f,
258
985k
      },
259
985k
      {
260
985k
          0.0,
261
985k
          0.0,
262
985k
          0.40824829046386274f,
263
985k
          0.0,
264
985k
          0.0,
265
985k
          0.0,
266
985k
          0.0,
267
985k
          -0.4082482904638628f,
268
985k
          -0.4082482904638635f,
269
985k
          0.0,
270
985k
          0.0,
271
985k
          -0.40824829046386296f,
272
985k
          0.0,
273
985k
          0.4082482904638634f,
274
985k
          0.408248290463863f,
275
985k
          0.0,
276
985k
      },
277
985k
      {
278
985k
          0.0,
279
985k
          0.0,
280
985k
          0.1747866975480809f,
281
985k
          0.0812611176717539f,
282
985k
          0.0,
283
985k
          0.0,
284
985k
          -0.3675398009862027f,
285
985k
          -0.307882213957909f,
286
985k
          -0.17478669754808135f,
287
985k
          0.3675398009862011f,
288
985k
          0.0,
289
985k
          0.4826689115059883f,
290
985k
          -0.08126111767175039f,
291
985k
          0.30788221395790305f,
292
985k
          -0.48266891150598584f,
293
985k
          0.0,
294
985k
      },
295
985k
      {
296
985k
          0.0,
297
985k
          0.0,
298
985k
          -0.21105601049335784f,
299
985k
          0.18567180916109802f,
300
985k
          0.0,
301
985k
          0.0,
302
985k
          0.49215859013738733f,
303
985k
          -0.38525013709251915f,
304
985k
          0.21105601049335806f,
305
985k
          -0.49215859013738905f,
306
985k
          0.0,
307
985k
          0.17419412659916217f,
308
985k
          -0.18567180916109904f,
309
985k
          0.3852501370925211f,
310
985k
          -0.1741941265991621f,
311
985k
          0.0,
312
985k
      },
313
985k
      {
314
985k
          0.0,
315
985k
          0.0,
316
985k
          -0.14266084808807264f,
317
985k
          -0.3416446842253372f,
318
985k
          0.0,
319
985k
          0.7367497537172237f,
320
985k
          0.24627107722075148f,
321
985k
          -0.08574019035519306f,
322
985k
          -0.14266084808807344f,
323
985k
          0.24627107722075137f,
324
985k
          0.14883399227113567f,
325
985k
          -0.04768680350229251f,
326
985k
          -0.3416446842253373f,
327
985k
          -0.08574019035519267f,
328
985k
          -0.047686803502292804f,
329
985k
          -0.14266084808807242f,
330
985k
      },
331
985k
      {
332
985k
          0.0,
333
985k
          0.0,
334
985k
          -0.13813540350758585f,
335
985k
          0.3302282550303788f,
336
985k
          0.0,
337
985k
          0.08755115000587084f,
338
985k
          -0.07946706605909573f,
339
985k
          -0.4613374887461511f,
340
985k
          -0.13813540350758294f,
341
985k
          -0.07946706605910261f,
342
985k
          0.49724647109535086f,
343
985k
          0.12538059448563663f,
344
985k
          0.3302282550303805f,
345
985k
          -0.4613374887461554f,
346
985k
          0.12538059448564315f,
347
985k
          -0.13813540350758452f,
348
985k
      },
349
985k
      {
350
985k
          0.0,
351
985k
          0.0,
352
985k
          -0.17437602599651067f,
353
985k
          0.0702790691196284f,
354
985k
          0.0,
355
985k
          -0.2921026642334881f,
356
985k
          0.3623817333531167f,
357
985k
          0.0,
358
985k
          -0.1743760259965108f,
359
985k
          0.36238173335311646f,
360
985k
          0.29210266423348785f,
361
985k
          -0.4326608024727445f,
362
985k
          0.07027906911962818f,
363
985k
          0.0,
364
985k
          -0.4326608024727457f,
365
985k
          0.34875205199302267f,
366
985k
      },
367
985k
      {
368
985k
          0.0,
369
985k
          0.0,
370
985k
          0.11354987314994337f,
371
985k
          -0.07417504595810355f,
372
985k
          0.0,
373
985k
          0.19402893032594343f,
374
985k
          -0.435190496523228f,
375
985k
          0.21918684838857466f,
376
985k
          0.11354987314994257f,
377
985k
          -0.4351904965232251f,
378
985k
          0.5550443808910661f,
379
985k
          -0.25468277124066463f,
380
985k
          -0.07417504595810233f,
381
985k
          0.2191868483885728f,
382
985k
          -0.25468277124066413f,
383
985k
          0.1135498731499429f,
384
985k
      },
385
985k
  };
386
387
985k
  const HWY_CAPPED(float, 16) d;
388
2.95M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
1.97M
    auto pixel = Zero(d);
390
33.5M
    for (size_t j = 0; j < 16; j++) {
391
31.5M
      auto cf = Set(d, coeffs[j]);
392
31.5M
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
31.5M
      pixel = MulAdd(cf, basis, pixel);
394
31.5M
    }
395
1.97M
    Store(pixel, d, pixels + i);
396
1.97M
  }
397
985k
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
398
399
template <size_t afv_kind>
400
void AFVTransformToPixels(const float* JXL_RESTRICT coefficients,
401
59.4M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
59.4M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
59.4M
  size_t afv_x = afv_kind & 1;
404
59.4M
  size_t afv_y = afv_kind / 2;
405
59.4M
  float dcs[3] = {};
406
59.4M
  float block00 = coefficients[0];
407
59.4M
  float block01 = coefficients[1];
408
59.4M
  float block10 = coefficients[8];
409
59.4M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
59.4M
  dcs[1] = (block00 + block10 - block01);
411
59.4M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
59.4M
  HWY_ALIGN float coeff[4 * 4];
414
59.4M
  coeff[0] = dcs[0];
415
297M
  for (size_t iy = 0; iy < 4; iy++) {
416
1.18G
    for (size_t ix = 0; ix < 4; ix++) {
417
950M
      if (ix == 0 && iy == 0) continue;
418
891M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
891M
    }
420
237M
  }
421
59.4M
  HWY_ALIGN float block[4 * 8];
422
59.4M
  AFVIDCT4x4(coeff, block);
423
297M
  for (size_t iy = 0; iy < 4; iy++) {
424
1.18G
    for (size_t ix = 0; ix < 4; ix++) {
425
950M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
950M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
950M
    }
428
237M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
59.4M
  block[0] = dcs[1];
431
297M
  for (size_t iy = 0; iy < 4; iy++) {
432
1.18G
    for (size_t ix = 0; ix < 4; ix++) {
433
950M
      if (ix == 0 && iy == 0) continue;
434
891M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
891M
    }
436
237M
  }
437
59.4M
  ComputeScaledIDCT<4, 4>()(
438
59.4M
      block,
439
59.4M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
59.4M
            pixels_stride),
441
59.4M
      scratch_space);
442
  // IDCT4x8.
443
59.4M
  block[0] = dcs[2];
444
297M
  for (size_t iy = 0; iy < 4; iy++) {
445
2.13G
    for (size_t ix = 0; ix < 8; ix++) {
446
1.90G
      if (ix == 0 && iy == 0) continue;
447
1.84G
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
1.84G
    }
449
237M
  }
450
59.4M
  ComputeScaledIDCT<4, 8>()(
451
59.4M
      block,
452
59.4M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
59.4M
      scratch_space);
454
59.4M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
14.6M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
14.6M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
14.6M
  size_t afv_x = afv_kind & 1;
404
14.6M
  size_t afv_y = afv_kind / 2;
405
14.6M
  float dcs[3] = {};
406
14.6M
  float block00 = coefficients[0];
407
14.6M
  float block01 = coefficients[1];
408
14.6M
  float block10 = coefficients[8];
409
14.6M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
14.6M
  dcs[1] = (block00 + block10 - block01);
411
14.6M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
14.6M
  HWY_ALIGN float coeff[4 * 4];
414
14.6M
  coeff[0] = dcs[0];
415
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
416
292M
    for (size_t ix = 0; ix < 4; ix++) {
417
233M
      if (ix == 0 && iy == 0) continue;
418
219M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
219M
    }
420
58.4M
  }
421
14.6M
  HWY_ALIGN float block[4 * 8];
422
14.6M
  AFVIDCT4x4(coeff, block);
423
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
424
292M
    for (size_t ix = 0; ix < 4; ix++) {
425
233M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
233M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
233M
    }
428
58.4M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
14.6M
  block[0] = dcs[1];
431
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
432
292M
    for (size_t ix = 0; ix < 4; ix++) {
433
233M
      if (ix == 0 && iy == 0) continue;
434
219M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
219M
    }
436
58.4M
  }
437
14.6M
  ComputeScaledIDCT<4, 4>()(
438
14.6M
      block,
439
14.6M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
14.6M
            pixels_stride),
441
14.6M
      scratch_space);
442
  // IDCT4x8.
443
14.6M
  block[0] = dcs[2];
444
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
445
525M
    for (size_t ix = 0; ix < 8; ix++) {
446
467M
      if (ix == 0 && iy == 0) continue;
447
452M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
452M
    }
449
58.4M
  }
450
14.6M
  ComputeScaledIDCT<4, 8>()(
451
14.6M
      block,
452
14.6M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
14.6M
      scratch_space);
454
14.6M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
14.6M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
14.6M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
14.6M
  size_t afv_x = afv_kind & 1;
404
14.6M
  size_t afv_y = afv_kind / 2;
405
14.6M
  float dcs[3] = {};
406
14.6M
  float block00 = coefficients[0];
407
14.6M
  float block01 = coefficients[1];
408
14.6M
  float block10 = coefficients[8];
409
14.6M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
14.6M
  dcs[1] = (block00 + block10 - block01);
411
14.6M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
14.6M
  HWY_ALIGN float coeff[4 * 4];
414
14.6M
  coeff[0] = dcs[0];
415
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
416
292M
    for (size_t ix = 0; ix < 4; ix++) {
417
233M
      if (ix == 0 && iy == 0) continue;
418
219M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
219M
    }
420
58.4M
  }
421
14.6M
  HWY_ALIGN float block[4 * 8];
422
14.6M
  AFVIDCT4x4(coeff, block);
423
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
424
292M
    for (size_t ix = 0; ix < 4; ix++) {
425
233M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
233M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
233M
    }
428
58.4M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
14.6M
  block[0] = dcs[1];
431
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
432
292M
    for (size_t ix = 0; ix < 4; ix++) {
433
233M
      if (ix == 0 && iy == 0) continue;
434
219M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
219M
    }
436
58.4M
  }
437
14.6M
  ComputeScaledIDCT<4, 4>()(
438
14.6M
      block,
439
14.6M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
14.6M
            pixels_stride),
441
14.6M
      scratch_space);
442
  // IDCT4x8.
443
14.6M
  block[0] = dcs[2];
444
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
445
525M
    for (size_t ix = 0; ix < 8; ix++) {
446
467M
      if (ix == 0 && iy == 0) continue;
447
452M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
452M
    }
449
58.4M
  }
450
14.6M
  ComputeScaledIDCT<4, 8>()(
451
14.6M
      block,
452
14.6M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
14.6M
      scratch_space);
454
14.6M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
14.6M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
14.6M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
14.6M
  size_t afv_x = afv_kind & 1;
404
14.6M
  size_t afv_y = afv_kind / 2;
405
14.6M
  float dcs[3] = {};
406
14.6M
  float block00 = coefficients[0];
407
14.6M
  float block01 = coefficients[1];
408
14.6M
  float block10 = coefficients[8];
409
14.6M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
14.6M
  dcs[1] = (block00 + block10 - block01);
411
14.6M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
14.6M
  HWY_ALIGN float coeff[4 * 4];
414
14.6M
  coeff[0] = dcs[0];
415
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
416
292M
    for (size_t ix = 0; ix < 4; ix++) {
417
233M
      if (ix == 0 && iy == 0) continue;
418
219M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
219M
    }
420
58.4M
  }
421
14.6M
  HWY_ALIGN float block[4 * 8];
422
14.6M
  AFVIDCT4x4(coeff, block);
423
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
424
292M
    for (size_t ix = 0; ix < 4; ix++) {
425
233M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
233M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
233M
    }
428
58.4M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
14.6M
  block[0] = dcs[1];
431
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
432
292M
    for (size_t ix = 0; ix < 4; ix++) {
433
233M
      if (ix == 0 && iy == 0) continue;
434
219M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
219M
    }
436
58.4M
  }
437
14.6M
  ComputeScaledIDCT<4, 4>()(
438
14.6M
      block,
439
14.6M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
14.6M
            pixels_stride),
441
14.6M
      scratch_space);
442
  // IDCT4x8.
443
14.6M
  block[0] = dcs[2];
444
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
445
525M
    for (size_t ix = 0; ix < 8; ix++) {
446
467M
      if (ix == 0 && iy == 0) continue;
447
452M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
452M
    }
449
58.4M
  }
450
14.6M
  ComputeScaledIDCT<4, 8>()(
451
14.6M
      block,
452
14.6M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
14.6M
      scratch_space);
454
14.6M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
14.6M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
14.6M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
14.6M
  size_t afv_x = afv_kind & 1;
404
14.6M
  size_t afv_y = afv_kind / 2;
405
14.6M
  float dcs[3] = {};
406
14.6M
  float block00 = coefficients[0];
407
14.6M
  float block01 = coefficients[1];
408
14.6M
  float block10 = coefficients[8];
409
14.6M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
14.6M
  dcs[1] = (block00 + block10 - block01);
411
14.6M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
14.6M
  HWY_ALIGN float coeff[4 * 4];
414
14.6M
  coeff[0] = dcs[0];
415
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
416
292M
    for (size_t ix = 0; ix < 4; ix++) {
417
233M
      if (ix == 0 && iy == 0) continue;
418
219M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
219M
    }
420
58.4M
  }
421
14.6M
  HWY_ALIGN float block[4 * 8];
422
14.6M
  AFVIDCT4x4(coeff, block);
423
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
424
292M
    for (size_t ix = 0; ix < 4; ix++) {
425
233M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
233M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
233M
    }
428
58.4M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
14.6M
  block[0] = dcs[1];
431
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
432
292M
    for (size_t ix = 0; ix < 4; ix++) {
433
233M
      if (ix == 0 && iy == 0) continue;
434
219M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
219M
    }
436
58.4M
  }
437
14.6M
  ComputeScaledIDCT<4, 4>()(
438
14.6M
      block,
439
14.6M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
14.6M
            pixels_stride),
441
14.6M
      scratch_space);
442
  // IDCT4x8.
443
14.6M
  block[0] = dcs[2];
444
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
445
525M
    for (size_t ix = 0; ix < 8; ix++) {
446
467M
      if (ix == 0 && iy == 0) continue;
447
452M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
452M
    }
449
58.4M
  }
450
14.6M
  ComputeScaledIDCT<4, 8>()(
451
14.6M
      block,
452
14.6M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
14.6M
      scratch_space);
454
14.6M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
317k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
317k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
317k
  size_t afv_x = afv_kind & 1;
404
317k
  size_t afv_y = afv_kind / 2;
405
317k
  float dcs[3] = {};
406
317k
  float block00 = coefficients[0];
407
317k
  float block01 = coefficients[1];
408
317k
  float block10 = coefficients[8];
409
317k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
317k
  dcs[1] = (block00 + block10 - block01);
411
317k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
317k
  HWY_ALIGN float coeff[4 * 4];
414
317k
  coeff[0] = dcs[0];
415
1.58M
  for (size_t iy = 0; iy < 4; iy++) {
416
6.34M
    for (size_t ix = 0; ix < 4; ix++) {
417
5.07M
      if (ix == 0 && iy == 0) continue;
418
4.75M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
4.75M
    }
420
1.26M
  }
421
317k
  HWY_ALIGN float block[4 * 8];
422
317k
  AFVIDCT4x4(coeff, block);
423
1.58M
  for (size_t iy = 0; iy < 4; iy++) {
424
6.34M
    for (size_t ix = 0; ix < 4; ix++) {
425
5.07M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
5.07M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
5.07M
    }
428
1.26M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
317k
  block[0] = dcs[1];
431
1.58M
  for (size_t iy = 0; iy < 4; iy++) {
432
6.34M
    for (size_t ix = 0; ix < 4; ix++) {
433
5.07M
      if (ix == 0 && iy == 0) continue;
434
4.75M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
4.75M
    }
436
1.26M
  }
437
317k
  ComputeScaledIDCT<4, 4>()(
438
317k
      block,
439
317k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
317k
            pixels_stride),
441
317k
      scratch_space);
442
  // IDCT4x8.
443
317k
  block[0] = dcs[2];
444
1.58M
  for (size_t iy = 0; iy < 4; iy++) {
445
11.4M
    for (size_t ix = 0; ix < 8; ix++) {
446
10.1M
      if (ix == 0 && iy == 0) continue;
447
9.83M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
9.83M
    }
449
1.26M
  }
450
317k
  ComputeScaledIDCT<4, 8>()(
451
317k
      block,
452
317k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
317k
      scratch_space);
454
317k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
188k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
188k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
188k
  size_t afv_x = afv_kind & 1;
404
188k
  size_t afv_y = afv_kind / 2;
405
188k
  float dcs[3] = {};
406
188k
  float block00 = coefficients[0];
407
188k
  float block01 = coefficients[1];
408
188k
  float block10 = coefficients[8];
409
188k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
188k
  dcs[1] = (block00 + block10 - block01);
411
188k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
188k
  HWY_ALIGN float coeff[4 * 4];
414
188k
  coeff[0] = dcs[0];
415
940k
  for (size_t iy = 0; iy < 4; iy++) {
416
3.76M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.01M
      if (ix == 0 && iy == 0) continue;
418
2.82M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
2.82M
    }
420
752k
  }
421
188k
  HWY_ALIGN float block[4 * 8];
422
188k
  AFVIDCT4x4(coeff, block);
423
940k
  for (size_t iy = 0; iy < 4; iy++) {
424
3.76M
    for (size_t ix = 0; ix < 4; ix++) {
425
3.01M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
3.01M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
3.01M
    }
428
752k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
188k
  block[0] = dcs[1];
431
940k
  for (size_t iy = 0; iy < 4; iy++) {
432
3.76M
    for (size_t ix = 0; ix < 4; ix++) {
433
3.01M
      if (ix == 0 && iy == 0) continue;
434
2.82M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
2.82M
    }
436
752k
  }
437
188k
  ComputeScaledIDCT<4, 4>()(
438
188k
      block,
439
188k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
188k
            pixels_stride),
441
188k
      scratch_space);
442
  // IDCT4x8.
443
188k
  block[0] = dcs[2];
444
940k
  for (size_t iy = 0; iy < 4; iy++) {
445
6.77M
    for (size_t ix = 0; ix < 8; ix++) {
446
6.02M
      if (ix == 0 && iy == 0) continue;
447
5.83M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
5.83M
    }
449
752k
  }
450
188k
  ComputeScaledIDCT<4, 8>()(
451
188k
      block,
452
188k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
188k
      scratch_space);
454
188k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
228k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
228k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
228k
  size_t afv_x = afv_kind & 1;
404
228k
  size_t afv_y = afv_kind / 2;
405
228k
  float dcs[3] = {};
406
228k
  float block00 = coefficients[0];
407
228k
  float block01 = coefficients[1];
408
228k
  float block10 = coefficients[8];
409
228k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
228k
  dcs[1] = (block00 + block10 - block01);
411
228k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
228k
  HWY_ALIGN float coeff[4 * 4];
414
228k
  coeff[0] = dcs[0];
415
1.14M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.57M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.66M
      if (ix == 0 && iy == 0) continue;
418
3.43M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
3.43M
    }
420
915k
  }
421
228k
  HWY_ALIGN float block[4 * 8];
422
228k
  AFVIDCT4x4(coeff, block);
423
1.14M
  for (size_t iy = 0; iy < 4; iy++) {
424
4.57M
    for (size_t ix = 0; ix < 4; ix++) {
425
3.66M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
3.66M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
3.66M
    }
428
915k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
228k
  block[0] = dcs[1];
431
1.14M
  for (size_t iy = 0; iy < 4; iy++) {
432
4.57M
    for (size_t ix = 0; ix < 4; ix++) {
433
3.66M
      if (ix == 0 && iy == 0) continue;
434
3.43M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
3.43M
    }
436
915k
  }
437
228k
  ComputeScaledIDCT<4, 4>()(
438
228k
      block,
439
228k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
228k
            pixels_stride),
441
228k
      scratch_space);
442
  // IDCT4x8.
443
228k
  block[0] = dcs[2];
444
1.14M
  for (size_t iy = 0; iy < 4; iy++) {
445
8.24M
    for (size_t ix = 0; ix < 8; ix++) {
446
7.32M
      if (ix == 0 && iy == 0) continue;
447
7.09M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
7.09M
    }
449
915k
  }
450
228k
  ComputeScaledIDCT<4, 8>()(
451
228k
      block,
452
228k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
228k
      scratch_space);
454
228k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
251k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
251k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
251k
  size_t afv_x = afv_kind & 1;
404
251k
  size_t afv_y = afv_kind / 2;
405
251k
  float dcs[3] = {};
406
251k
  float block00 = coefficients[0];
407
251k
  float block01 = coefficients[1];
408
251k
  float block10 = coefficients[8];
409
251k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
251k
  dcs[1] = (block00 + block10 - block01);
411
251k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
251k
  HWY_ALIGN float coeff[4 * 4];
414
251k
  coeff[0] = dcs[0];
415
1.25M
  for (size_t iy = 0; iy < 4; iy++) {
416
5.02M
    for (size_t ix = 0; ix < 4; ix++) {
417
4.01M
      if (ix == 0 && iy == 0) continue;
418
3.76M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
3.76M
    }
420
1.00M
  }
421
251k
  HWY_ALIGN float block[4 * 8];
422
251k
  AFVIDCT4x4(coeff, block);
423
1.25M
  for (size_t iy = 0; iy < 4; iy++) {
424
5.02M
    for (size_t ix = 0; ix < 4; ix++) {
425
4.01M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
4.01M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
4.01M
    }
428
1.00M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
251k
  block[0] = dcs[1];
431
1.25M
  for (size_t iy = 0; iy < 4; iy++) {
432
5.02M
    for (size_t ix = 0; ix < 4; ix++) {
433
4.01M
      if (ix == 0 && iy == 0) continue;
434
3.76M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
3.76M
    }
436
1.00M
  }
437
251k
  ComputeScaledIDCT<4, 4>()(
438
251k
      block,
439
251k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
251k
            pixels_stride),
441
251k
      scratch_space);
442
  // IDCT4x8.
443
251k
  block[0] = dcs[2];
444
1.25M
  for (size_t iy = 0; iy < 4; iy++) {
445
9.04M
    for (size_t ix = 0; ix < 8; ix++) {
446
8.03M
      if (ix == 0 && iy == 0) continue;
447
7.78M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
7.78M
    }
449
1.00M
  }
450
251k
  ComputeScaledIDCT<4, 8>()(
451
251k
      block,
452
251k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
251k
      scratch_space);
454
251k
}
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
455
456
HWY_MAYBE_UNUSED void TransformToPixels(const AcStrategyType strategy,
457
                                        float* JXL_RESTRICT coefficients,
458
                                        float* JXL_RESTRICT pixels,
459
                                        size_t pixels_stride,
460
202M
                                        float* scratch_space) {
461
202M
  using Type = AcStrategyType;
462
202M
  switch (strategy) {
463
18.1M
    case Type::IDENTITY: {
464
18.1M
      float dcs[4] = {};
465
18.1M
      float block00 = coefficients[0];
466
18.1M
      float block01 = coefficients[1];
467
18.1M
      float block10 = coefficients[8];
468
18.1M
      float block11 = coefficients[9];
469
18.1M
      dcs[0] = block00 + block01 + block10 + block11;
470
18.1M
      dcs[1] = block00 + block01 - block10 - block11;
471
18.1M
      dcs[2] = block00 - block01 + block10 - block11;
472
18.1M
      dcs[3] = block00 - block01 - block10 + block11;
473
54.3M
      for (size_t y = 0; y < 2; y++) {
474
108M
        for (size_t x = 0; x < 2; x++) {
475
72.5M
          float block_dc = dcs[y * 2 + x];
476
72.5M
          float residual_sum = 0;
477
362M
          for (size_t iy = 0; iy < 4; iy++) {
478
1.45G
            for (size_t ix = 0; ix < 4; ix++) {
479
1.16G
              if (ix == 0 && iy == 0) continue;
480
1.08G
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
1.08G
            }
482
290M
          }
483
72.5M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
72.5M
              block_dc - residual_sum * (1.0f / 16);
485
362M
          for (size_t iy = 0; iy < 4; iy++) {
486
1.45G
            for (size_t ix = 0; ix < 4; ix++) {
487
1.16G
              if (ix == 1 && iy == 1) continue;
488
1.08G
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
1.08G
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
1.08G
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
1.08G
            }
492
290M
          }
493
72.5M
          pixels[y * 4 * pixels_stride + x * 4] =
494
72.5M
              coefficients[(y + 2) * 8 + x + 2] +
495
72.5M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
72.5M
        }
497
36.2M
      }
498
18.1M
      break;
499
0
    }
500
15.1M
    case Type::DCT8X4: {
501
15.1M
      float dcs[2] = {};
502
15.1M
      float block0 = coefficients[0];
503
15.1M
      float block1 = coefficients[8];
504
15.1M
      dcs[0] = block0 + block1;
505
15.1M
      dcs[1] = block0 - block1;
506
45.3M
      for (size_t x = 0; x < 2; x++) {
507
30.2M
        HWY_ALIGN float block[4 * 8];
508
30.2M
        block[0] = dcs[x];
509
151M
        for (size_t iy = 0; iy < 4; iy++) {
510
1.08G
          for (size_t ix = 0; ix < 8; ix++) {
511
966M
            if (ix == 0 && iy == 0) continue;
512
936M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
936M
          }
514
120M
        }
515
30.2M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
30.2M
                                  scratch_space);
517
30.2M
      }
518
15.1M
      break;
519
0
    }
520
14.8M
    case Type::DCT4X8: {
521
14.8M
      float dcs[2] = {};
522
14.8M
      float block0 = coefficients[0];
523
14.8M
      float block1 = coefficients[8];
524
14.8M
      dcs[0] = block0 + block1;
525
14.8M
      dcs[1] = block0 - block1;
526
44.5M
      for (size_t y = 0; y < 2; y++) {
527
29.6M
        HWY_ALIGN float block[4 * 8];
528
29.6M
        block[0] = dcs[y];
529
148M
        for (size_t iy = 0; iy < 4; iy++) {
530
1.06G
          for (size_t ix = 0; ix < 8; ix++) {
531
949M
            if (ix == 0 && iy == 0) continue;
532
920M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
920M
          }
534
118M
        }
535
29.6M
        ComputeScaledIDCT<4, 8>()(
536
29.6M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
29.6M
            scratch_space);
538
29.6M
      }
539
14.8M
      break;
540
0
    }
541
14.6M
    case Type::DCT4X4: {
542
14.6M
      float dcs[4] = {};
543
14.6M
      float block00 = coefficients[0];
544
14.6M
      float block01 = coefficients[1];
545
14.6M
      float block10 = coefficients[8];
546
14.6M
      float block11 = coefficients[9];
547
14.6M
      dcs[0] = block00 + block01 + block10 + block11;
548
14.6M
      dcs[1] = block00 + block01 - block10 - block11;
549
14.6M
      dcs[2] = block00 - block01 + block10 - block11;
550
14.6M
      dcs[3] = block00 - block01 - block10 + block11;
551
43.8M
      for (size_t y = 0; y < 2; y++) {
552
87.6M
        for (size_t x = 0; x < 2; x++) {
553
58.4M
          HWY_ALIGN float block[4 * 4];
554
58.4M
          block[0] = dcs[y * 2 + x];
555
292M
          for (size_t iy = 0; iy < 4; iy++) {
556
1.16G
            for (size_t ix = 0; ix < 4; ix++) {
557
935M
              if (ix == 0 && iy == 0) continue;
558
876M
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
876M
            }
560
233M
          }
561
58.4M
          ComputeScaledIDCT<4, 4>()(
562
58.4M
              block,
563
58.4M
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
58.4M
              scratch_space);
565
58.4M
        }
566
29.2M
      }
567
14.6M
      break;
568
0
    }
569
21.3M
    case Type::DCT2X2: {
570
21.3M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
21.3M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
21.3M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
21.3M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
21.3M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
192M
      for (size_t y = 0; y < kBlockDim; y++) {
576
1.53G
        for (size_t x = 0; x < kBlockDim; x++) {
577
1.36G
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
1.36G
        }
579
170M
      }
580
21.3M
      break;
581
0
    }
582
6.39M
    case Type::DCT16X16: {
583
6.39M
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
6.39M
                                  scratch_space);
585
6.39M
      break;
586
0
    }
587
12.2M
    case Type::DCT16X8: {
588
12.2M
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
12.2M
                                 scratch_space);
590
12.2M
      break;
591
0
    }
592
12.3M
    case Type::DCT8X16: {
593
12.3M
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
12.3M
                                 scratch_space);
595
12.3M
      break;
596
0
    }
597
42
    case Type::DCT32X8: {
598
42
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
42
                                 scratch_space);
600
42
      break;
601
0
    }
602
144
    case Type::DCT8X32: {
603
144
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
144
                                 scratch_space);
605
144
      break;
606
0
    }
607
2.48M
    case Type::DCT32X16: {
608
2.48M
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
2.48M
                                  scratch_space);
610
2.48M
      break;
611
0
    }
612
2.46M
    case Type::DCT16X32: {
613
2.46M
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
2.46M
                                  scratch_space);
615
2.46M
      break;
616
0
    }
617
1.49M
    case Type::DCT32X32: {
618
1.49M
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
1.49M
                                  scratch_space);
620
1.49M
      break;
621
0
    }
622
20.4M
    case Type::DCT: {
623
20.4M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
20.4M
                                scratch_space);
625
20.4M
      break;
626
0
    }
627
14.9M
    case Type::AFV0: {
628
14.9M
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
14.9M
      break;
630
0
    }
631
14.7M
    case Type::AFV1: {
632
14.7M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
14.7M
      break;
634
0
    }
635
14.8M
    case Type::AFV2: {
636
14.8M
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
14.8M
      break;
638
0
    }
639
14.8M
    case Type::AFV3: {
640
14.8M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
14.8M
      break;
642
0
    }
643
723k
    case Type::DCT64X32: {
644
723k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
723k
                                  scratch_space);
646
723k
      break;
647
0
    }
648
444k
    case Type::DCT32X64: {
649
444k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
444k
                                  scratch_space);
651
444k
      break;
652
0
    }
653
393k
    case Type::DCT64X64: {
654
393k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
393k
                                  scratch_space);
656
393k
      break;
657
0
    }
658
3
    case Type::DCT128X64: {
659
3
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
3
                                   scratch_space);
661
3
      break;
662
0
    }
663
3
    case Type::DCT64X128: {
664
3
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
3
                                   scratch_space);
666
3
      break;
667
0
    }
668
18
    case Type::DCT128X128: {
669
18
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
18
                                    scratch_space);
671
18
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
202M
  }
689
202M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
182M
                                        float* scratch_space) {
461
182M
  using Type = AcStrategyType;
462
182M
  switch (strategy) {
463
14.6M
    case Type::IDENTITY: {
464
14.6M
      float dcs[4] = {};
465
14.6M
      float block00 = coefficients[0];
466
14.6M
      float block01 = coefficients[1];
467
14.6M
      float block10 = coefficients[8];
468
14.6M
      float block11 = coefficients[9];
469
14.6M
      dcs[0] = block00 + block01 + block10 + block11;
470
14.6M
      dcs[1] = block00 + block01 - block10 - block11;
471
14.6M
      dcs[2] = block00 - block01 + block10 - block11;
472
14.6M
      dcs[3] = block00 - block01 - block10 + block11;
473
43.8M
      for (size_t y = 0; y < 2; y++) {
474
87.6M
        for (size_t x = 0; x < 2; x++) {
475
58.4M
          float block_dc = dcs[y * 2 + x];
476
58.4M
          float residual_sum = 0;
477
292M
          for (size_t iy = 0; iy < 4; iy++) {
478
1.16G
            for (size_t ix = 0; ix < 4; ix++) {
479
934M
              if (ix == 0 && iy == 0) continue;
480
876M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
876M
            }
482
233M
          }
483
58.4M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
58.4M
              block_dc - residual_sum * (1.0f / 16);
485
292M
          for (size_t iy = 0; iy < 4; iy++) {
486
1.16G
            for (size_t ix = 0; ix < 4; ix++) {
487
934M
              if (ix == 1 && iy == 1) continue;
488
876M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
876M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
876M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
876M
            }
492
233M
          }
493
58.4M
          pixels[y * 4 * pixels_stride + x * 4] =
494
58.4M
              coefficients[(y + 2) * 8 + x + 2] +
495
58.4M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
58.4M
        }
497
29.2M
      }
498
14.6M
      break;
499
0
    }
500
14.6M
    case Type::DCT8X4: {
501
14.6M
      float dcs[2] = {};
502
14.6M
      float block0 = coefficients[0];
503
14.6M
      float block1 = coefficients[8];
504
14.6M
      dcs[0] = block0 + block1;
505
14.6M
      dcs[1] = block0 - block1;
506
43.8M
      for (size_t x = 0; x < 2; x++) {
507
29.2M
        HWY_ALIGN float block[4 * 8];
508
29.2M
        block[0] = dcs[x];
509
146M
        for (size_t iy = 0; iy < 4; iy++) {
510
1.05G
          for (size_t ix = 0; ix < 8; ix++) {
511
934M
            if (ix == 0 && iy == 0) continue;
512
905M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
905M
          }
514
116M
        }
515
29.2M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
29.2M
                                  scratch_space);
517
29.2M
      }
518
14.6M
      break;
519
0
    }
520
14.6M
    case Type::DCT4X8: {
521
14.6M
      float dcs[2] = {};
522
14.6M
      float block0 = coefficients[0];
523
14.6M
      float block1 = coefficients[8];
524
14.6M
      dcs[0] = block0 + block1;
525
14.6M
      dcs[1] = block0 - block1;
526
43.8M
      for (size_t y = 0; y < 2; y++) {
527
29.2M
        HWY_ALIGN float block[4 * 8];
528
29.2M
        block[0] = dcs[y];
529
146M
        for (size_t iy = 0; iy < 4; iy++) {
530
1.05G
          for (size_t ix = 0; ix < 8; ix++) {
531
934M
            if (ix == 0 && iy == 0) continue;
532
905M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
905M
          }
534
116M
        }
535
29.2M
        ComputeScaledIDCT<4, 8>()(
536
29.2M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
29.2M
            scratch_space);
538
29.2M
      }
539
14.6M
      break;
540
0
    }
541
14.6M
    case Type::DCT4X4: {
542
14.6M
      float dcs[4] = {};
543
14.6M
      float block00 = coefficients[0];
544
14.6M
      float block01 = coefficients[1];
545
14.6M
      float block10 = coefficients[8];
546
14.6M
      float block11 = coefficients[9];
547
14.6M
      dcs[0] = block00 + block01 + block10 + block11;
548
14.6M
      dcs[1] = block00 + block01 - block10 - block11;
549
14.6M
      dcs[2] = block00 - block01 + block10 - block11;
550
14.6M
      dcs[3] = block00 - block01 - block10 + block11;
551
43.8M
      for (size_t y = 0; y < 2; y++) {
552
87.6M
        for (size_t x = 0; x < 2; x++) {
553
58.4M
          HWY_ALIGN float block[4 * 4];
554
58.4M
          block[0] = dcs[y * 2 + x];
555
292M
          for (size_t iy = 0; iy < 4; iy++) {
556
1.16G
            for (size_t ix = 0; ix < 4; ix++) {
557
934M
              if (ix == 0 && iy == 0) continue;
558
876M
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
876M
            }
560
233M
          }
561
58.4M
          ComputeScaledIDCT<4, 4>()(
562
58.4M
              block,
563
58.4M
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
58.4M
              scratch_space);
565
58.4M
        }
566
29.2M
      }
567
14.6M
      break;
568
0
    }
569
14.6M
    case Type::DCT2X2: {
570
14.6M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
14.6M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
14.6M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
14.6M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
14.6M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
131M
      for (size_t y = 0; y < kBlockDim; y++) {
576
1.05G
        for (size_t x = 0; x < kBlockDim; x++) {
577
934M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
934M
        }
579
116M
      }
580
14.6M
      break;
581
0
    }
582
5.83M
    case Type::DCT16X16: {
583
5.83M
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
5.83M
                                  scratch_space);
585
5.83M
      break;
586
0
    }
587
11.5M
    case Type::DCT16X8: {
588
11.5M
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
11.5M
                                 scratch_space);
590
11.5M
      break;
591
0
    }
592
11.5M
    case Type::DCT8X16: {
593
11.5M
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
11.5M
                                 scratch_space);
595
11.5M
      break;
596
0
    }
597
0
    case Type::DCT32X8: {
598
0
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
0
                                 scratch_space);
600
0
      break;
601
0
    }
602
0
    case Type::DCT8X32: {
603
0
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
0
                                 scratch_space);
605
0
      break;
606
0
    }
607
2.30M
    case Type::DCT32X16: {
608
2.30M
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
2.30M
                                  scratch_space);
610
2.30M
      break;
611
0
    }
612
2.28M
    case Type::DCT16X32: {
613
2.28M
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
2.28M
                                  scratch_space);
615
2.28M
      break;
616
0
    }
617
1.16M
    case Type::DCT32X32: {
618
1.16M
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
1.16M
                                  scratch_space);
620
1.16M
      break;
621
0
    }
622
14.6M
    case Type::DCT: {
623
14.6M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
14.6M
                                scratch_space);
625
14.6M
      break;
626
0
    }
627
14.6M
    case Type::AFV0: {
628
14.6M
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
14.6M
      break;
630
0
    }
631
14.6M
    case Type::AFV1: {
632
14.6M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
14.6M
      break;
634
0
    }
635
14.6M
    case Type::AFV2: {
636
14.6M
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
14.6M
      break;
638
0
    }
639
14.6M
    case Type::AFV3: {
640
14.6M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
14.6M
      break;
642
0
    }
643
700k
    case Type::DCT64X32: {
644
700k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
700k
                                  scratch_space);
646
700k
      break;
647
0
    }
648
424k
    case Type::DCT32X64: {
649
424k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
424k
                                  scratch_space);
651
424k
      break;
652
0
    }
653
202k
    case Type::DCT64X64: {
654
202k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
202k
                                  scratch_space);
656
202k
      break;
657
0
    }
658
0
    case Type::DCT128X64: {
659
0
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT64X128: {
664
0
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
0
    case Type::DCT128X128: {
669
0
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
0
                                    scratch_space);
671
0
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
182M
  }
689
182M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
20.8M
                                        float* scratch_space) {
461
20.8M
  using Type = AcStrategyType;
462
20.8M
  switch (strategy) {
463
3.52M
    case Type::IDENTITY: {
464
3.52M
      float dcs[4] = {};
465
3.52M
      float block00 = coefficients[0];
466
3.52M
      float block01 = coefficients[1];
467
3.52M
      float block10 = coefficients[8];
468
3.52M
      float block11 = coefficients[9];
469
3.52M
      dcs[0] = block00 + block01 + block10 + block11;
470
3.52M
      dcs[1] = block00 + block01 - block10 - block11;
471
3.52M
      dcs[2] = block00 - block01 + block10 - block11;
472
3.52M
      dcs[3] = block00 - block01 - block10 + block11;
473
10.5M
      for (size_t y = 0; y < 2; y++) {
474
21.1M
        for (size_t x = 0; x < 2; x++) {
475
14.0M
          float block_dc = dcs[y * 2 + x];
476
14.0M
          float residual_sum = 0;
477
70.4M
          for (size_t iy = 0; iy < 4; iy++) {
478
281M
            for (size_t ix = 0; ix < 4; ix++) {
479
225M
              if (ix == 0 && iy == 0) continue;
480
211M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
211M
            }
482
56.3M
          }
483
14.0M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
14.0M
              block_dc - residual_sum * (1.0f / 16);
485
70.4M
          for (size_t iy = 0; iy < 4; iy++) {
486
281M
            for (size_t ix = 0; ix < 4; ix++) {
487
225M
              if (ix == 1 && iy == 1) continue;
488
211M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
211M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
211M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
211M
            }
492
56.3M
          }
493
14.0M
          pixels[y * 4 * pixels_stride + x * 4] =
494
14.0M
              coefficients[(y + 2) * 8 + x + 2] +
495
14.0M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
14.0M
        }
497
7.04M
      }
498
3.52M
      break;
499
0
    }
500
495k
    case Type::DCT8X4: {
501
495k
      float dcs[2] = {};
502
495k
      float block0 = coefficients[0];
503
495k
      float block1 = coefficients[8];
504
495k
      dcs[0] = block0 + block1;
505
495k
      dcs[1] = block0 - block1;
506
1.48M
      for (size_t x = 0; x < 2; x++) {
507
991k
        HWY_ALIGN float block[4 * 8];
508
991k
        block[0] = dcs[x];
509
4.95M
        for (size_t iy = 0; iy < 4; iy++) {
510
35.7M
          for (size_t ix = 0; ix < 8; ix++) {
511
31.7M
            if (ix == 0 && iy == 0) continue;
512
30.7M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
30.7M
          }
514
3.96M
        }
515
991k
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
991k
                                  scratch_space);
517
991k
      }
518
495k
      break;
519
0
    }
520
231k
    case Type::DCT4X8: {
521
231k
      float dcs[2] = {};
522
231k
      float block0 = coefficients[0];
523
231k
      float block1 = coefficients[8];
524
231k
      dcs[0] = block0 + block1;
525
231k
      dcs[1] = block0 - block1;
526
695k
      for (size_t y = 0; y < 2; y++) {
527
463k
        HWY_ALIGN float block[4 * 8];
528
463k
        block[0] = dcs[y];
529
2.31M
        for (size_t iy = 0; iy < 4; iy++) {
530
16.7M
          for (size_t ix = 0; ix < 8; ix++) {
531
14.8M
            if (ix == 0 && iy == 0) continue;
532
14.3M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
14.3M
          }
534
1.85M
        }
535
463k
        ComputeScaledIDCT<4, 8>()(
536
463k
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
463k
            scratch_space);
538
463k
      }
539
231k
      break;
540
0
    }
541
4.35k
    case Type::DCT4X4: {
542
4.35k
      float dcs[4] = {};
543
4.35k
      float block00 = coefficients[0];
544
4.35k
      float block01 = coefficients[1];
545
4.35k
      float block10 = coefficients[8];
546
4.35k
      float block11 = coefficients[9];
547
4.35k
      dcs[0] = block00 + block01 + block10 + block11;
548
4.35k
      dcs[1] = block00 + block01 - block10 - block11;
549
4.35k
      dcs[2] = block00 - block01 + block10 - block11;
550
4.35k
      dcs[3] = block00 - block01 - block10 + block11;
551
13.0k
      for (size_t y = 0; y < 2; y++) {
552
26.1k
        for (size_t x = 0; x < 2; x++) {
553
17.4k
          HWY_ALIGN float block[4 * 4];
554
17.4k
          block[0] = dcs[y * 2 + x];
555
87.0k
          for (size_t iy = 0; iy < 4; iy++) {
556
348k
            for (size_t ix = 0; ix < 4; ix++) {
557
278k
              if (ix == 0 && iy == 0) continue;
558
261k
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
261k
            }
560
69.6k
          }
561
17.4k
          ComputeScaledIDCT<4, 4>()(
562
17.4k
              block,
563
17.4k
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
17.4k
              scratch_space);
565
17.4k
        }
566
8.70k
      }
567
4.35k
      break;
568
0
    }
569
6.75M
    case Type::DCT2X2: {
570
6.75M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
6.75M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
6.75M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
6.75M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
6.75M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
60.7M
      for (size_t y = 0; y < kBlockDim; y++) {
576
486M
        for (size_t x = 0; x < kBlockDim; x++) {
577
432M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
432M
        }
579
54.0M
      }
580
6.75M
      break;
581
0
    }
582
554k
    case Type::DCT16X16: {
583
554k
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
554k
                                  scratch_space);
585
554k
      break;
586
0
    }
587
750k
    case Type::DCT16X8: {
588
750k
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
750k
                                 scratch_space);
590
750k
      break;
591
0
    }
592
800k
    case Type::DCT8X16: {
593
800k
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
800k
                                 scratch_space);
595
800k
      break;
596
0
    }
597
42
    case Type::DCT32X8: {
598
42
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
42
                                 scratch_space);
600
42
      break;
601
0
    }
602
144
    case Type::DCT8X32: {
603
144
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
144
                                 scratch_space);
605
144
      break;
606
0
    }
607
174k
    case Type::DCT32X16: {
608
174k
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
174k
                                  scratch_space);
610
174k
      break;
611
0
    }
612
182k
    case Type::DCT16X32: {
613
182k
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
182k
                                  scratch_space);
615
182k
      break;
616
0
    }
617
330k
    case Type::DCT32X32: {
618
330k
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
330k
                                  scratch_space);
620
330k
      break;
621
0
    }
622
5.81M
    case Type::DCT: {
623
5.81M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
5.81M
                                scratch_space);
625
5.81M
      break;
626
0
    }
627
317k
    case Type::AFV0: {
628
317k
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
317k
      break;
630
0
    }
631
188k
    case Type::AFV1: {
632
188k
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
188k
      break;
634
0
    }
635
228k
    case Type::AFV2: {
636
228k
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
228k
      break;
638
0
    }
639
251k
    case Type::AFV3: {
640
251k
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
251k
      break;
642
0
    }
643
23.5k
    case Type::DCT64X32: {
644
23.5k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
23.5k
                                  scratch_space);
646
23.5k
      break;
647
0
    }
648
19.9k
    case Type::DCT32X64: {
649
19.9k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
19.9k
                                  scratch_space);
651
19.9k
      break;
652
0
    }
653
190k
    case Type::DCT64X64: {
654
190k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
190k
                                  scratch_space);
656
190k
      break;
657
0
    }
658
3
    case Type::DCT128X64: {
659
3
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
3
                                   scratch_space);
661
3
      break;
662
0
    }
663
3
    case Type::DCT64X128: {
664
3
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
3
                                   scratch_space);
666
3
      break;
667
0
    }
668
18
    case Type::DCT128X128: {
669
18
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
18
                                    scratch_space);
671
18
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
20.8M
  }
689
20.8M
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
690
691
HWY_MAYBE_UNUSED void LowestFrequenciesFromDC(const AcStrategyType strategy,
692
                                              const float* dc, size_t dc_stride,
693
                                              float* llf,
694
21.1M
                                              float* JXL_RESTRICT scratch) {
695
21.1M
  using Type = AcStrategyType;
696
21.1M
  HWY_ALIGN float warm_block[4 * 4];
697
21.1M
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
21.1M
  switch (strategy) {
699
750k
    case Type::DCT16X8: {
700
750k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
750k
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
750k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
750k
      break;
704
0
    }
705
800k
    case Type::DCT8X16: {
706
800k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
800k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
800k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
800k
      break;
710
0
    }
711
554k
    case Type::DCT16X16: {
712
554k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
554k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
554k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
554k
      break;
716
0
    }
717
42
    case Type::DCT32X8: {
718
42
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
42
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
42
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
42
      break;
722
0
    }
723
144
    case Type::DCT8X32: {
724
144
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
144
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
144
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
144
      break;
728
0
    }
729
174k
    case Type::DCT32X16: {
730
174k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
174k
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
174k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
174k
      break;
734
0
    }
735
182k
    case Type::DCT16X32: {
736
182k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
182k
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
182k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
182k
      break;
740
0
    }
741
330k
    case Type::DCT32X32: {
742
330k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
330k
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
330k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
330k
      break;
746
0
    }
747
23.5k
    case Type::DCT64X32: {
748
23.5k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
23.5k
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
23.5k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
23.5k
      break;
752
0
    }
753
19.9k
    case Type::DCT32X64: {
754
19.9k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
19.9k
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
19.9k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
19.9k
      break;
758
0
    }
759
190k
    case Type::DCT64X64: {
760
190k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
190k
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
190k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
190k
      break;
764
0
    }
765
3
    case Type::DCT128X64: {
766
3
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
3
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
3
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
3
      break;
770
0
    }
771
3
    case Type::DCT64X128: {
772
3
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
3
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
3
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
3
      break;
776
0
    }
777
18
    case Type::DCT128X128: {
778
18
      ReinterpretingDCT<
779
18
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
18
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
18
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
18
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
0
    case Type::DCT256X256: {
799
0
      ReinterpretingDCT<
800
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
0
      break;
804
0
    }
805
5.84M
    case Type::DCT:
806
12.6M
    case Type::DCT2X2:
807
12.6M
    case Type::DCT4X4:
808
12.8M
    case Type::DCT4X8:
809
13.3M
    case Type::DCT8X4:
810
13.6M
    case Type::AFV0:
811
13.8M
    case Type::AFV1:
812
14.0M
    case Type::AFV2:
813
14.3M
    case Type::AFV3:
814
18.1M
    case Type::IDENTITY:
815
18.1M
      llf[0] = dc[0];
816
18.1M
      break;
817
21.1M
  };
818
21.1M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
694
21.1M
                                              float* JXL_RESTRICT scratch) {
695
21.1M
  using Type = AcStrategyType;
696
21.1M
  HWY_ALIGN float warm_block[4 * 4];
697
21.1M
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
21.1M
  switch (strategy) {
699
750k
    case Type::DCT16X8: {
700
750k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
750k
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
750k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
750k
      break;
704
0
    }
705
800k
    case Type::DCT8X16: {
706
800k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
800k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
800k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
800k
      break;
710
0
    }
711
554k
    case Type::DCT16X16: {
712
554k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
554k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
554k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
554k
      break;
716
0
    }
717
42
    case Type::DCT32X8: {
718
42
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
42
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
42
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
42
      break;
722
0
    }
723
144
    case Type::DCT8X32: {
724
144
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
144
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
144
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
144
      break;
728
0
    }
729
174k
    case Type::DCT32X16: {
730
174k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
174k
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
174k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
174k
      break;
734
0
    }
735
182k
    case Type::DCT16X32: {
736
182k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
182k
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
182k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
182k
      break;
740
0
    }
741
330k
    case Type::DCT32X32: {
742
330k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
330k
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
330k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
330k
      break;
746
0
    }
747
23.5k
    case Type::DCT64X32: {
748
23.5k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
23.5k
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
23.5k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
23.5k
      break;
752
0
    }
753
19.9k
    case Type::DCT32X64: {
754
19.9k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
19.9k
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
19.9k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
19.9k
      break;
758
0
    }
759
190k
    case Type::DCT64X64: {
760
190k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
190k
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
190k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
190k
      break;
764
0
    }
765
3
    case Type::DCT128X64: {
766
3
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
3
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
3
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
3
      break;
770
0
    }
771
3
    case Type::DCT64X128: {
772
3
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
3
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
3
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
3
      break;
776
0
    }
777
18
    case Type::DCT128X128: {
778
18
      ReinterpretingDCT<
779
18
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
18
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
18
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
18
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
0
    case Type::DCT256X256: {
799
0
      ReinterpretingDCT<
800
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
0
      break;
804
0
    }
805
5.84M
    case Type::DCT:
806
12.6M
    case Type::DCT2X2:
807
12.6M
    case Type::DCT4X4:
808
12.8M
    case Type::DCT4X8:
809
13.3M
    case Type::DCT8X4:
810
13.6M
    case Type::AFV0:
811
13.8M
    case Type::AFV1:
812
14.0M
    case Type::AFV2:
813
14.3M
    case Type::AFV3:
814
18.1M
    case Type::IDENTITY:
815
18.1M
      llf[0] = dc[0];
816
18.1M
      break;
817
21.1M
  };
818
21.1M
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
819
820
}  // namespace
821
// NOLINTNEXTLINE(google-readability-namespace-comments)
822
}  // namespace HWY_NAMESPACE
823
}  // namespace jxl
824
HWY_AFTER_NAMESPACE();
825
826
#endif  // LIB_JXL_DEC_TRANSFORMS_INL_H_