Coverage Report

Created: 2026-05-16 07:22

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/dec_transforms-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include <cstring>
7
8
#include "lib/jxl/base/compiler_specific.h"
9
#include "lib/jxl/frame_dimensions.h"
10
11
#if defined(LIB_JXL_DEC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
12
#ifdef LIB_JXL_DEC_TRANSFORMS_INL_H_
13
#undef LIB_JXL_DEC_TRANSFORMS_INL_H_
14
#else
15
#define LIB_JXL_DEC_TRANSFORMS_INL_H_
16
#endif
17
18
#include <cstddef>
19
#include <hwy/highway.h>
20
21
#include "lib/jxl/ac_strategy.h"
22
#include "lib/jxl/dct-inl.h"
23
#include "lib/jxl/dct_scales.h"
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
namespace HWY_NAMESPACE {
27
namespace {
28
29
// These templates are not found via ADL.
30
using hwy::HWY_NAMESPACE::MulAdd;
31
32
// Computes the lowest-frequency LF_ROWSxLF_COLS-sized square in output, which
33
// is a DCT_ROWS*DCT_COLS-sized DCT block, by doing a ROWS*COLS DCT on the
34
// input block.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
JXL_INLINE void ReinterpretingDCT(const float* input, const size_t input_stride,
38
                                  float* output, const size_t output_stride,
39
                                  float* JXL_RESTRICT block,
40
2.82M
                                  float* JXL_RESTRICT scratch_space) {
41
2.82M
  static_assert(LF_ROWS == ROWS,
42
2.82M
                "ReinterpretingDCT should only be called with LF == N");
43
2.82M
  static_assert(LF_COLS == COLS,
44
2.82M
                "ReinterpretingDCT should only be called with LF == N");
45
2.82M
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
2.82M
                                 scratch_space);
47
2.82M
  if (ROWS < COLS) {
48
2.12M
    for (size_t y = 0; y < LF_ROWS; y++) {
49
4.68M
      for (size_t x = 0; x < LF_COLS; x++) {
50
3.50M
        output[y * output_stride + x] =
51
3.50M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
3.50M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
3.50M
      }
54
1.17M
    }
55
1.87M
  } else {
56
6.54M
    for (size_t y = 0; y < LF_COLS; y++) {
57
25.5M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
20.8M
        output[y * output_stride + x] =
59
20.8M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
20.8M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
20.8M
      }
62
4.67M
    }
63
1.87M
  }
64
2.82M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
704k
                                  float* JXL_RESTRICT scratch_space) {
41
704k
  static_assert(LF_ROWS == ROWS,
42
704k
                "ReinterpretingDCT should only be called with LF == N");
43
704k
  static_assert(LF_COLS == COLS,
44
704k
                "ReinterpretingDCT should only be called with LF == N");
45
704k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
704k
                                 scratch_space);
47
704k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
704k
  } else {
56
1.40M
    for (size_t y = 0; y < LF_COLS; y++) {
57
2.11M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.40M
        output[y * output_stride + x] =
59
1.40M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.40M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.40M
      }
62
704k
    }
63
704k
  }
64
704k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
757k
                                  float* JXL_RESTRICT scratch_space) {
41
757k
  static_assert(LF_ROWS == ROWS,
42
757k
                "ReinterpretingDCT should only be called with LF == N");
43
757k
  static_assert(LF_COLS == COLS,
44
757k
                "ReinterpretingDCT should only be called with LF == N");
45
757k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
757k
                                 scratch_space);
47
757k
  if (ROWS < COLS) {
48
1.51M
    for (size_t y = 0; y < LF_ROWS; y++) {
49
2.27M
      for (size_t x = 0; x < LF_COLS; x++) {
50
1.51M
        output[y * output_stride + x] =
51
1.51M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
1.51M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
1.51M
      }
54
757k
    }
55
757k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
757k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
517k
                                  float* JXL_RESTRICT scratch_space) {
41
517k
  static_assert(LF_ROWS == ROWS,
42
517k
                "ReinterpretingDCT should only be called with LF == N");
43
517k
  static_assert(LF_COLS == COLS,
44
517k
                "ReinterpretingDCT should only be called with LF == N");
45
517k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
517k
                                 scratch_space);
47
517k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
517k
  } else {
56
1.55M
    for (size_t y = 0; y < LF_COLS; y++) {
57
3.10M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
2.07M
        output[y * output_stride + x] =
59
2.07M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
2.07M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
2.07M
      }
62
1.03M
    }
63
517k
  }
64
517k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
45
                                  float* JXL_RESTRICT scratch_space) {
41
45
  static_assert(LF_ROWS == ROWS,
42
45
                "ReinterpretingDCT should only be called with LF == N");
43
45
  static_assert(LF_COLS == COLS,
44
45
                "ReinterpretingDCT should only be called with LF == N");
45
45
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
45
                                 scratch_space);
47
45
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
45
  } else {
56
90
    for (size_t y = 0; y < LF_COLS; y++) {
57
225
      for (size_t x = 0; x < LF_ROWS; x++) {
58
180
        output[y * output_stride + x] =
59
180
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
180
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
180
      }
62
45
    }
63
45
  }
64
45
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
144
                                  float* JXL_RESTRICT scratch_space) {
41
144
  static_assert(LF_ROWS == ROWS,
42
144
                "ReinterpretingDCT should only be called with LF == N");
43
144
  static_assert(LF_COLS == COLS,
44
144
                "ReinterpretingDCT should only be called with LF == N");
45
144
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
144
                                 scratch_space);
47
144
  if (ROWS < COLS) {
48
288
    for (size_t y = 0; y < LF_ROWS; y++) {
49
720
      for (size_t x = 0; x < LF_COLS; x++) {
50
576
        output[y * output_stride + x] =
51
576
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
576
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
576
      }
54
144
    }
55
144
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
144
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
164k
                                  float* JXL_RESTRICT scratch_space) {
41
164k
  static_assert(LF_ROWS == ROWS,
42
164k
                "ReinterpretingDCT should only be called with LF == N");
43
164k
  static_assert(LF_COLS == COLS,
44
164k
                "ReinterpretingDCT should only be called with LF == N");
45
164k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
164k
                                 scratch_space);
47
164k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
164k
  } else {
56
494k
    for (size_t y = 0; y < LF_COLS; y++) {
57
1.64M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.31M
        output[y * output_stride + x] =
59
1.31M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.31M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.31M
      }
62
329k
    }
63
164k
  }
64
164k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
172k
                                  float* JXL_RESTRICT scratch_space) {
41
172k
  static_assert(LF_ROWS == ROWS,
42
172k
                "ReinterpretingDCT should only be called with LF == N");
43
172k
  static_assert(LF_COLS == COLS,
44
172k
                "ReinterpretingDCT should only be called with LF == N");
45
172k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
172k
                                 scratch_space);
47
172k
  if (ROWS < COLS) {
48
517k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
1.72M
      for (size_t x = 0; x < LF_COLS; x++) {
50
1.37M
        output[y * output_stride + x] =
51
1.37M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
1.37M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
1.37M
      }
54
344k
    }
55
172k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
172k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
295k
                                  float* JXL_RESTRICT scratch_space) {
41
295k
  static_assert(LF_ROWS == ROWS,
42
295k
                "ReinterpretingDCT should only be called with LF == N");
43
295k
  static_assert(LF_COLS == COLS,
44
295k
                "ReinterpretingDCT should only be called with LF == N");
45
295k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
295k
                                 scratch_space);
47
295k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
295k
  } else {
56
1.47M
    for (size_t y = 0; y < LF_COLS; y++) {
57
5.90M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
4.72M
        output[y * output_stride + x] =
59
4.72M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
4.72M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
4.72M
      }
62
1.18M
    }
63
295k
  }
64
295k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
22.4k
                                  float* JXL_RESTRICT scratch_space) {
41
22.4k
  static_assert(LF_ROWS == ROWS,
42
22.4k
                "ReinterpretingDCT should only be called with LF == N");
43
22.4k
  static_assert(LF_COLS == COLS,
44
22.4k
                "ReinterpretingDCT should only be called with LF == N");
45
22.4k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
22.4k
                                 scratch_space);
47
22.4k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
22.4k
  } else {
56
112k
    for (size_t y = 0; y < LF_COLS; y++) {
57
806k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
716k
        output[y * output_stride + x] =
59
716k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
716k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
716k
      }
62
89.6k
    }
63
22.4k
  }
64
22.4k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
19.0k
                                  float* JXL_RESTRICT scratch_space) {
41
19.0k
  static_assert(LF_ROWS == ROWS,
42
19.0k
                "ReinterpretingDCT should only be called with LF == N");
43
19.0k
  static_assert(LF_COLS == COLS,
44
19.0k
                "ReinterpretingDCT should only be called with LF == N");
45
19.0k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
19.0k
                                 scratch_space);
47
19.0k
  if (ROWS < COLS) {
48
95.3k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
686k
      for (size_t x = 0; x < LF_COLS; x++) {
50
610k
        output[y * output_stride + x] =
51
610k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
610k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
610k
      }
54
76.2k
    }
55
19.0k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
19.0k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
166k
                                  float* JXL_RESTRICT scratch_space) {
41
166k
  static_assert(LF_ROWS == ROWS,
42
166k
                "ReinterpretingDCT should only be called with LF == N");
43
166k
  static_assert(LF_COLS == COLS,
44
166k
                "ReinterpretingDCT should only be called with LF == N");
45
166k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
166k
                                 scratch_space);
47
166k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
166k
  } else {
56
1.49M
    for (size_t y = 0; y < LF_COLS; y++) {
57
11.9M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
10.6M
        output[y * output_stride + x] =
59
10.6M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
10.6M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
10.6M
      }
62
1.33M
    }
63
166k
  }
64
166k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
3
                                  float* JXL_RESTRICT scratch_space) {
41
3
  static_assert(LF_ROWS == ROWS,
42
3
                "ReinterpretingDCT should only be called with LF == N");
43
3
  static_assert(LF_COLS == COLS,
44
3
                "ReinterpretingDCT should only be called with LF == N");
45
3
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
3
                                 scratch_space);
47
3
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
3
  } else {
56
27
    for (size_t y = 0; y < LF_COLS; y++) {
57
408
      for (size_t x = 0; x < LF_ROWS; x++) {
58
384
        output[y * output_stride + x] =
59
384
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
384
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
384
      }
62
24
    }
63
3
  }
64
3
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
3
                                  float* JXL_RESTRICT scratch_space) {
41
3
  static_assert(LF_ROWS == ROWS,
42
3
                "ReinterpretingDCT should only be called with LF == N");
43
3
  static_assert(LF_COLS == COLS,
44
3
                "ReinterpretingDCT should only be called with LF == N");
45
3
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
3
                                 scratch_space);
47
3
  if (ROWS < COLS) {
48
27
    for (size_t y = 0; y < LF_ROWS; y++) {
49
408
      for (size_t x = 0; x < LF_COLS; x++) {
50
384
        output[y * output_stride + x] =
51
384
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
384
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
384
      }
54
24
    }
55
3
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
3
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
24
                                  float* JXL_RESTRICT scratch_space) {
41
24
  static_assert(LF_ROWS == ROWS,
42
24
                "ReinterpretingDCT should only be called with LF == N");
43
24
  static_assert(LF_COLS == COLS,
44
24
                "ReinterpretingDCT should only be called with LF == N");
45
24
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
24
                                 scratch_space);
47
24
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
24
  } else {
56
408
    for (size_t y = 0; y < LF_COLS; y++) {
57
6.52k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
6.14k
        output[y * output_stride + x] =
59
6.14k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
6.14k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
6.14k
      }
62
384
    }
63
24
  }
64
24
}
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
65
66
template <size_t S>
67
58.2M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
58.2M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
58.2M
  static_assert(S % 2 == 0, "S should be even");
70
58.2M
  float temp[kDCTBlockSize];
71
58.2M
  constexpr size_t num_2x2 = S / 2;
72
194M
  for (size_t y = 0; y < num_2x2; y++) {
73
543M
    for (size_t x = 0; x < num_2x2; x++) {
74
407M
      float c00 = block[y * kBlockDim + x];
75
407M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
407M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
407M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
407M
      float r00 = c00 + c01 + c10 + c11;
79
407M
      float r01 = c00 + c01 - c10 - c11;
80
407M
      float r10 = c00 - c01 + c10 - c11;
81
407M
      float r11 = c00 - c01 - c10 + c11;
82
407M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
407M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
407M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
407M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
407M
    }
87
135M
  }
88
330M
  for (size_t y = 0; y < S; y++) {
89
1.90G
    for (size_t x = 0; x < S; x++) {
90
1.63G
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
1.63G
    }
92
271M
  }
93
58.2M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
13.2M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
13.2M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
13.2M
  static_assert(S % 2 == 0, "S should be even");
70
13.2M
  float temp[kDCTBlockSize];
71
13.2M
  constexpr size_t num_2x2 = S / 2;
72
26.4M
  for (size_t y = 0; y < num_2x2; y++) {
73
26.4M
    for (size_t x = 0; x < num_2x2; x++) {
74
13.2M
      float c00 = block[y * kBlockDim + x];
75
13.2M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
13.2M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
13.2M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
13.2M
      float r00 = c00 + c01 + c10 + c11;
79
13.2M
      float r01 = c00 + c01 - c10 - c11;
80
13.2M
      float r10 = c00 - c01 + c10 - c11;
81
13.2M
      float r11 = c00 - c01 - c10 + c11;
82
13.2M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
13.2M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
13.2M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
13.2M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
13.2M
    }
87
13.2M
  }
88
39.6M
  for (size_t y = 0; y < S; y++) {
89
79.2M
    for (size_t x = 0; x < S; x++) {
90
52.8M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
52.8M
    }
92
26.4M
  }
93
13.2M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
13.2M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
13.2M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
13.2M
  static_assert(S % 2 == 0, "S should be even");
70
13.2M
  float temp[kDCTBlockSize];
71
13.2M
  constexpr size_t num_2x2 = S / 2;
72
39.6M
  for (size_t y = 0; y < num_2x2; y++) {
73
79.2M
    for (size_t x = 0; x < num_2x2; x++) {
74
52.8M
      float c00 = block[y * kBlockDim + x];
75
52.8M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
52.8M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
52.8M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
52.8M
      float r00 = c00 + c01 + c10 + c11;
79
52.8M
      float r01 = c00 + c01 - c10 - c11;
80
52.8M
      float r10 = c00 - c01 + c10 - c11;
81
52.8M
      float r11 = c00 - c01 - c10 + c11;
82
52.8M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
52.8M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
52.8M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
52.8M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
52.8M
    }
87
26.4M
  }
88
66.0M
  for (size_t y = 0; y < S; y++) {
89
264M
    for (size_t x = 0; x < S; x++) {
90
211M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
211M
    }
92
52.8M
  }
93
13.2M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
13.2M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
13.2M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
13.2M
  static_assert(S % 2 == 0, "S should be even");
70
13.2M
  float temp[kDCTBlockSize];
71
13.2M
  constexpr size_t num_2x2 = S / 2;
72
66.0M
  for (size_t y = 0; y < num_2x2; y++) {
73
264M
    for (size_t x = 0; x < num_2x2; x++) {
74
211M
      float c00 = block[y * kBlockDim + x];
75
211M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
211M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
211M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
211M
      float r00 = c00 + c01 + c10 + c11;
79
211M
      float r01 = c00 + c01 - c10 - c11;
80
211M
      float r10 = c00 - c01 + c10 - c11;
81
211M
      float r11 = c00 - c01 - c10 + c11;
82
211M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
211M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
211M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
211M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
211M
    }
87
52.8M
  }
88
118M
  for (size_t y = 0; y < S; y++) {
89
950M
    for (size_t x = 0; x < S; x++) {
90
845M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
845M
    }
92
105M
  }
93
13.2M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
6.21M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
6.21M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
6.21M
  static_assert(S % 2 == 0, "S should be even");
70
6.21M
  float temp[kDCTBlockSize];
71
6.21M
  constexpr size_t num_2x2 = S / 2;
72
12.4M
  for (size_t y = 0; y < num_2x2; y++) {
73
12.4M
    for (size_t x = 0; x < num_2x2; x++) {
74
6.21M
      float c00 = block[y * kBlockDim + x];
75
6.21M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
6.21M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
6.21M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
6.21M
      float r00 = c00 + c01 + c10 + c11;
79
6.21M
      float r01 = c00 + c01 - c10 - c11;
80
6.21M
      float r10 = c00 - c01 + c10 - c11;
81
6.21M
      float r11 = c00 - c01 - c10 + c11;
82
6.21M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
6.21M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
6.21M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
6.21M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
6.21M
    }
87
6.21M
  }
88
18.6M
  for (size_t y = 0; y < S; y++) {
89
37.2M
    for (size_t x = 0; x < S; x++) {
90
24.8M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
24.8M
    }
92
12.4M
  }
93
6.21M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
6.21M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
6.21M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
6.21M
  static_assert(S % 2 == 0, "S should be even");
70
6.21M
  float temp[kDCTBlockSize];
71
6.21M
  constexpr size_t num_2x2 = S / 2;
72
18.6M
  for (size_t y = 0; y < num_2x2; y++) {
73
37.2M
    for (size_t x = 0; x < num_2x2; x++) {
74
24.8M
      float c00 = block[y * kBlockDim + x];
75
24.8M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
24.8M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
24.8M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
24.8M
      float r00 = c00 + c01 + c10 + c11;
79
24.8M
      float r01 = c00 + c01 - c10 - c11;
80
24.8M
      float r10 = c00 - c01 + c10 - c11;
81
24.8M
      float r11 = c00 - c01 - c10 + c11;
82
24.8M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
24.8M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
24.8M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
24.8M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
24.8M
    }
87
12.4M
  }
88
31.0M
  for (size_t y = 0; y < S; y++) {
89
124M
    for (size_t x = 0; x < S; x++) {
90
99.4M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
99.4M
    }
92
24.8M
  }
93
6.21M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
6.21M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
6.21M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
6.21M
  static_assert(S % 2 == 0, "S should be even");
70
6.21M
  float temp[kDCTBlockSize];
71
6.21M
  constexpr size_t num_2x2 = S / 2;
72
31.0M
  for (size_t y = 0; y < num_2x2; y++) {
73
124M
    for (size_t x = 0; x < num_2x2; x++) {
74
99.4M
      float c00 = block[y * kBlockDim + x];
75
99.4M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
99.4M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
99.4M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
99.4M
      float r00 = c00 + c01 + c10 + c11;
79
99.4M
      float r01 = c00 + c01 - c10 - c11;
80
99.4M
      float r10 = c00 - c01 + c10 - c11;
81
99.4M
      float r11 = c00 - c01 - c10 + c11;
82
99.4M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
99.4M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
99.4M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
99.4M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
99.4M
    }
87
24.8M
  }
88
55.9M
  for (size_t y = 0; y < S; y++) {
89
447M
    for (size_t x = 0; x < S; x++) {
90
397M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
397M
    }
92
49.7M
  }
93
6.21M
}
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
94
95
53.7M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
53.7M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
53.7M
      {
98
53.7M
          0.25,
99
53.7M
          0.25,
100
53.7M
          0.25,
101
53.7M
          0.25,
102
53.7M
          0.25,
103
53.7M
          0.25,
104
53.7M
          0.25,
105
53.7M
          0.25,
106
53.7M
          0.25,
107
53.7M
          0.25,
108
53.7M
          0.25,
109
53.7M
          0.25,
110
53.7M
          0.25,
111
53.7M
          0.25,
112
53.7M
          0.25,
113
53.7M
          0.25,
114
53.7M
      },
115
53.7M
      {
116
53.7M
          0.876902929799142f,
117
53.7M
          0.2206518106944235f,
118
53.7M
          -0.10140050393753763f,
119
53.7M
          -0.1014005039375375f,
120
53.7M
          0.2206518106944236f,
121
53.7M
          -0.10140050393753777f,
122
53.7M
          -0.10140050393753772f,
123
53.7M
          -0.10140050393753763f,
124
53.7M
          -0.10140050393753758f,
125
53.7M
          -0.10140050393753769f,
126
53.7M
          -0.1014005039375375f,
127
53.7M
          -0.10140050393753768f,
128
53.7M
          -0.10140050393753768f,
129
53.7M
          -0.10140050393753759f,
130
53.7M
          -0.10140050393753763f,
131
53.7M
          -0.10140050393753741f,
132
53.7M
      },
133
53.7M
      {
134
53.7M
          0.0,
135
53.7M
          0.0,
136
53.7M
          0.40670075830260755f,
137
53.7M
          0.44444816619734445f,
138
53.7M
          0.0,
139
53.7M
          0.0,
140
53.7M
          0.19574399372042936f,
141
53.7M
          0.2929100136981264f,
142
53.7M
          -0.40670075830260716f,
143
53.7M
          -0.19574399372042872f,
144
53.7M
          0.0,
145
53.7M
          0.11379074460448091f,
146
53.7M
          -0.44444816619734384f,
147
53.7M
          -0.29291001369812636f,
148
53.7M
          -0.1137907446044814f,
149
53.7M
          0.0,
150
53.7M
      },
151
53.7M
      {
152
53.7M
          0.0,
153
53.7M
          0.0,
154
53.7M
          -0.21255748058288748f,
155
53.7M
          0.3085497062849767f,
156
53.7M
          0.0,
157
53.7M
          0.4706702258572536f,
158
53.7M
          -0.1621205195722993f,
159
53.7M
          0.0,
160
53.7M
          -0.21255748058287047f,
161
53.7M
          -0.16212051957228327f,
162
53.7M
          -0.47067022585725277f,
163
53.7M
          -0.1464291867126764f,
164
53.7M
          0.3085497062849487f,
165
53.7M
          0.0,
166
53.7M
          -0.14642918671266536f,
167
53.7M
          0.4251149611657548f,
168
53.7M
      },
169
53.7M
      {
170
53.7M
          0.0,
171
53.7M
          -0.7071067811865474f,
172
53.7M
          0.0,
173
53.7M
          0.0,
174
53.7M
          0.7071067811865476f,
175
53.7M
          0.0,
176
53.7M
          0.0,
177
53.7M
          0.0,
178
53.7M
          0.0,
179
53.7M
          0.0,
180
53.7M
          0.0,
181
53.7M
          0.0,
182
53.7M
          0.0,
183
53.7M
          0.0,
184
53.7M
          0.0,
185
53.7M
          0.0,
186
53.7M
      },
187
53.7M
      {
188
53.7M
          -0.4105377591765233f,
189
53.7M
          0.6235485373547691f,
190
53.7M
          -0.06435071657946274f,
191
53.7M
          -0.06435071657946266f,
192
53.7M
          0.6235485373547694f,
193
53.7M
          -0.06435071657946284f,
194
53.7M
          -0.0643507165794628f,
195
53.7M
          -0.06435071657946274f,
196
53.7M
          -0.06435071657946272f,
197
53.7M
          -0.06435071657946279f,
198
53.7M
          -0.06435071657946266f,
199
53.7M
          -0.06435071657946277f,
200
53.7M
          -0.06435071657946277f,
201
53.7M
          -0.06435071657946273f,
202
53.7M
          -0.06435071657946274f,
203
53.7M
          -0.0643507165794626f,
204
53.7M
      },
205
53.7M
      {
206
53.7M
          0.0,
207
53.7M
          0.0,
208
53.7M
          -0.4517556589999482f,
209
53.7M
          0.15854503551840063f,
210
53.7M
          0.0,
211
53.7M
          -0.04038515160822202f,
212
53.7M
          0.0074182263792423875f,
213
53.7M
          0.39351034269210167f,
214
53.7M
          -0.45175565899994635f,
215
53.7M
          0.007418226379244351f,
216
53.7M
          0.1107416575309343f,
217
53.7M
          0.08298163094882051f,
218
53.7M
          0.15854503551839705f,
219
53.7M
          0.3935103426921022f,
220
53.7M
          0.0829816309488214f,
221
53.7M
          -0.45175565899994796f,
222
53.7M
      },
223
53.7M
      {
224
53.7M
          0.0,
225
53.7M
          0.0,
226
53.7M
          -0.304684750724869f,
227
53.7M
          0.5112616136591823f,
228
53.7M
          0.0,
229
53.7M
          0.0,
230
53.7M
          -0.290480129728998f,
231
53.7M
          -0.06578701549142804f,
232
53.7M
          0.304684750724884f,
233
53.7M
          0.2904801297290076f,
234
53.7M
          0.0,
235
53.7M
          -0.23889773523344604f,
236
53.7M
          -0.5112616136592012f,
237
53.7M
          0.06578701549142545f,
238
53.7M
          0.23889773523345467f,
239
53.7M
          0.0,
240
53.7M
      },
241
53.7M
      {
242
53.7M
          0.0,
243
53.7M
          0.0,
244
53.7M
          0.3017929516615495f,
245
53.7M
          0.25792362796341184f,
246
53.7M
          0.0,
247
53.7M
          0.16272340142866204f,
248
53.7M
          0.09520022653475037f,
249
53.7M
          0.0,
250
53.7M
          0.3017929516615503f,
251
53.7M
          0.09520022653475055f,
252
53.7M
          -0.16272340142866173f,
253
53.7M
          -0.35312385449816297f,
254
53.7M
          0.25792362796341295f,
255
53.7M
          0.0,
256
53.7M
          -0.3531238544981624f,
257
53.7M
          -0.6035859033230976f,
258
53.7M
      },
259
53.7M
      {
260
53.7M
          0.0,
261
53.7M
          0.0,
262
53.7M
          0.40824829046386274f,
263
53.7M
          0.0,
264
53.7M
          0.0,
265
53.7M
          0.0,
266
53.7M
          0.0,
267
53.7M
          -0.4082482904638628f,
268
53.7M
          -0.4082482904638635f,
269
53.7M
          0.0,
270
53.7M
          0.0,
271
53.7M
          -0.40824829046386296f,
272
53.7M
          0.0,
273
53.7M
          0.4082482904638634f,
274
53.7M
          0.408248290463863f,
275
53.7M
          0.0,
276
53.7M
      },
277
53.7M
      {
278
53.7M
          0.0,
279
53.7M
          0.0,
280
53.7M
          0.1747866975480809f,
281
53.7M
          0.0812611176717539f,
282
53.7M
          0.0,
283
53.7M
          0.0,
284
53.7M
          -0.3675398009862027f,
285
53.7M
          -0.307882213957909f,
286
53.7M
          -0.17478669754808135f,
287
53.7M
          0.3675398009862011f,
288
53.7M
          0.0,
289
53.7M
          0.4826689115059883f,
290
53.7M
          -0.08126111767175039f,
291
53.7M
          0.30788221395790305f,
292
53.7M
          -0.48266891150598584f,
293
53.7M
          0.0,
294
53.7M
      },
295
53.7M
      {
296
53.7M
          0.0,
297
53.7M
          0.0,
298
53.7M
          -0.21105601049335784f,
299
53.7M
          0.18567180916109802f,
300
53.7M
          0.0,
301
53.7M
          0.0,
302
53.7M
          0.49215859013738733f,
303
53.7M
          -0.38525013709251915f,
304
53.7M
          0.21105601049335806f,
305
53.7M
          -0.49215859013738905f,
306
53.7M
          0.0,
307
53.7M
          0.17419412659916217f,
308
53.7M
          -0.18567180916109904f,
309
53.7M
          0.3852501370925211f,
310
53.7M
          -0.1741941265991621f,
311
53.7M
          0.0,
312
53.7M
      },
313
53.7M
      {
314
53.7M
          0.0,
315
53.7M
          0.0,
316
53.7M
          -0.14266084808807264f,
317
53.7M
          -0.3416446842253372f,
318
53.7M
          0.0,
319
53.7M
          0.7367497537172237f,
320
53.7M
          0.24627107722075148f,
321
53.7M
          -0.08574019035519306f,
322
53.7M
          -0.14266084808807344f,
323
53.7M
          0.24627107722075137f,
324
53.7M
          0.14883399227113567f,
325
53.7M
          -0.04768680350229251f,
326
53.7M
          -0.3416446842253373f,
327
53.7M
          -0.08574019035519267f,
328
53.7M
          -0.047686803502292804f,
329
53.7M
          -0.14266084808807242f,
330
53.7M
      },
331
53.7M
      {
332
53.7M
          0.0,
333
53.7M
          0.0,
334
53.7M
          -0.13813540350758585f,
335
53.7M
          0.3302282550303788f,
336
53.7M
          0.0,
337
53.7M
          0.08755115000587084f,
338
53.7M
          -0.07946706605909573f,
339
53.7M
          -0.4613374887461511f,
340
53.7M
          -0.13813540350758294f,
341
53.7M
          -0.07946706605910261f,
342
53.7M
          0.49724647109535086f,
343
53.7M
          0.12538059448563663f,
344
53.7M
          0.3302282550303805f,
345
53.7M
          -0.4613374887461554f,
346
53.7M
          0.12538059448564315f,
347
53.7M
          -0.13813540350758452f,
348
53.7M
      },
349
53.7M
      {
350
53.7M
          0.0,
351
53.7M
          0.0,
352
53.7M
          -0.17437602599651067f,
353
53.7M
          0.0702790691196284f,
354
53.7M
          0.0,
355
53.7M
          -0.2921026642334881f,
356
53.7M
          0.3623817333531167f,
357
53.7M
          0.0,
358
53.7M
          -0.1743760259965108f,
359
53.7M
          0.36238173335311646f,
360
53.7M
          0.29210266423348785f,
361
53.7M
          -0.4326608024727445f,
362
53.7M
          0.07027906911962818f,
363
53.7M
          0.0,
364
53.7M
          -0.4326608024727457f,
365
53.7M
          0.34875205199302267f,
366
53.7M
      },
367
53.7M
      {
368
53.7M
          0.0,
369
53.7M
          0.0,
370
53.7M
          0.11354987314994337f,
371
53.7M
          -0.07417504595810355f,
372
53.7M
          0.0,
373
53.7M
          0.19402893032594343f,
374
53.7M
          -0.435190496523228f,
375
53.7M
          0.21918684838857466f,
376
53.7M
          0.11354987314994257f,
377
53.7M
          -0.4351904965232251f,
378
53.7M
          0.5550443808910661f,
379
53.7M
          -0.25468277124066463f,
380
53.7M
          -0.07417504595810233f,
381
53.7M
          0.2191868483885728f,
382
53.7M
          -0.25468277124066413f,
383
53.7M
          0.1135498731499429f,
384
53.7M
      },
385
53.7M
  };
386
387
53.7M
  const HWY_CAPPED(float, 16) d;
388
161M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
107M
    auto pixel = Zero(d);
390
1.82G
    for (size_t j = 0; j < 16; j++) {
391
1.72G
      auto cf = Set(d, coeffs[j]);
392
1.72G
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
1.72G
      pixel = MulAdd(cf, basis, pixel);
394
1.72G
    }
395
107M
    Store(pixel, d, pixels + i);
396
107M
  }
397
53.7M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
52.8M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
52.8M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
52.8M
      {
98
52.8M
          0.25,
99
52.8M
          0.25,
100
52.8M
          0.25,
101
52.8M
          0.25,
102
52.8M
          0.25,
103
52.8M
          0.25,
104
52.8M
          0.25,
105
52.8M
          0.25,
106
52.8M
          0.25,
107
52.8M
          0.25,
108
52.8M
          0.25,
109
52.8M
          0.25,
110
52.8M
          0.25,
111
52.8M
          0.25,
112
52.8M
          0.25,
113
52.8M
          0.25,
114
52.8M
      },
115
52.8M
      {
116
52.8M
          0.876902929799142f,
117
52.8M
          0.2206518106944235f,
118
52.8M
          -0.10140050393753763f,
119
52.8M
          -0.1014005039375375f,
120
52.8M
          0.2206518106944236f,
121
52.8M
          -0.10140050393753777f,
122
52.8M
          -0.10140050393753772f,
123
52.8M
          -0.10140050393753763f,
124
52.8M
          -0.10140050393753758f,
125
52.8M
          -0.10140050393753769f,
126
52.8M
          -0.1014005039375375f,
127
52.8M
          -0.10140050393753768f,
128
52.8M
          -0.10140050393753768f,
129
52.8M
          -0.10140050393753759f,
130
52.8M
          -0.10140050393753763f,
131
52.8M
          -0.10140050393753741f,
132
52.8M
      },
133
52.8M
      {
134
52.8M
          0.0,
135
52.8M
          0.0,
136
52.8M
          0.40670075830260755f,
137
52.8M
          0.44444816619734445f,
138
52.8M
          0.0,
139
52.8M
          0.0,
140
52.8M
          0.19574399372042936f,
141
52.8M
          0.2929100136981264f,
142
52.8M
          -0.40670075830260716f,
143
52.8M
          -0.19574399372042872f,
144
52.8M
          0.0,
145
52.8M
          0.11379074460448091f,
146
52.8M
          -0.44444816619734384f,
147
52.8M
          -0.29291001369812636f,
148
52.8M
          -0.1137907446044814f,
149
52.8M
          0.0,
150
52.8M
      },
151
52.8M
      {
152
52.8M
          0.0,
153
52.8M
          0.0,
154
52.8M
          -0.21255748058288748f,
155
52.8M
          0.3085497062849767f,
156
52.8M
          0.0,
157
52.8M
          0.4706702258572536f,
158
52.8M
          -0.1621205195722993f,
159
52.8M
          0.0,
160
52.8M
          -0.21255748058287047f,
161
52.8M
          -0.16212051957228327f,
162
52.8M
          -0.47067022585725277f,
163
52.8M
          -0.1464291867126764f,
164
52.8M
          0.3085497062849487f,
165
52.8M
          0.0,
166
52.8M
          -0.14642918671266536f,
167
52.8M
          0.4251149611657548f,
168
52.8M
      },
169
52.8M
      {
170
52.8M
          0.0,
171
52.8M
          -0.7071067811865474f,
172
52.8M
          0.0,
173
52.8M
          0.0,
174
52.8M
          0.7071067811865476f,
175
52.8M
          0.0,
176
52.8M
          0.0,
177
52.8M
          0.0,
178
52.8M
          0.0,
179
52.8M
          0.0,
180
52.8M
          0.0,
181
52.8M
          0.0,
182
52.8M
          0.0,
183
52.8M
          0.0,
184
52.8M
          0.0,
185
52.8M
          0.0,
186
52.8M
      },
187
52.8M
      {
188
52.8M
          -0.4105377591765233f,
189
52.8M
          0.6235485373547691f,
190
52.8M
          -0.06435071657946274f,
191
52.8M
          -0.06435071657946266f,
192
52.8M
          0.6235485373547694f,
193
52.8M
          -0.06435071657946284f,
194
52.8M
          -0.0643507165794628f,
195
52.8M
          -0.06435071657946274f,
196
52.8M
          -0.06435071657946272f,
197
52.8M
          -0.06435071657946279f,
198
52.8M
          -0.06435071657946266f,
199
52.8M
          -0.06435071657946277f,
200
52.8M
          -0.06435071657946277f,
201
52.8M
          -0.06435071657946273f,
202
52.8M
          -0.06435071657946274f,
203
52.8M
          -0.0643507165794626f,
204
52.8M
      },
205
52.8M
      {
206
52.8M
          0.0,
207
52.8M
          0.0,
208
52.8M
          -0.4517556589999482f,
209
52.8M
          0.15854503551840063f,
210
52.8M
          0.0,
211
52.8M
          -0.04038515160822202f,
212
52.8M
          0.0074182263792423875f,
213
52.8M
          0.39351034269210167f,
214
52.8M
          -0.45175565899994635f,
215
52.8M
          0.007418226379244351f,
216
52.8M
          0.1107416575309343f,
217
52.8M
          0.08298163094882051f,
218
52.8M
          0.15854503551839705f,
219
52.8M
          0.3935103426921022f,
220
52.8M
          0.0829816309488214f,
221
52.8M
          -0.45175565899994796f,
222
52.8M
      },
223
52.8M
      {
224
52.8M
          0.0,
225
52.8M
          0.0,
226
52.8M
          -0.304684750724869f,
227
52.8M
          0.5112616136591823f,
228
52.8M
          0.0,
229
52.8M
          0.0,
230
52.8M
          -0.290480129728998f,
231
52.8M
          -0.06578701549142804f,
232
52.8M
          0.304684750724884f,
233
52.8M
          0.2904801297290076f,
234
52.8M
          0.0,
235
52.8M
          -0.23889773523344604f,
236
52.8M
          -0.5112616136592012f,
237
52.8M
          0.06578701549142545f,
238
52.8M
          0.23889773523345467f,
239
52.8M
          0.0,
240
52.8M
      },
241
52.8M
      {
242
52.8M
          0.0,
243
52.8M
          0.0,
244
52.8M
          0.3017929516615495f,
245
52.8M
          0.25792362796341184f,
246
52.8M
          0.0,
247
52.8M
          0.16272340142866204f,
248
52.8M
          0.09520022653475037f,
249
52.8M
          0.0,
250
52.8M
          0.3017929516615503f,
251
52.8M
          0.09520022653475055f,
252
52.8M
          -0.16272340142866173f,
253
52.8M
          -0.35312385449816297f,
254
52.8M
          0.25792362796341295f,
255
52.8M
          0.0,
256
52.8M
          -0.3531238544981624f,
257
52.8M
          -0.6035859033230976f,
258
52.8M
      },
259
52.8M
      {
260
52.8M
          0.0,
261
52.8M
          0.0,
262
52.8M
          0.40824829046386274f,
263
52.8M
          0.0,
264
52.8M
          0.0,
265
52.8M
          0.0,
266
52.8M
          0.0,
267
52.8M
          -0.4082482904638628f,
268
52.8M
          -0.4082482904638635f,
269
52.8M
          0.0,
270
52.8M
          0.0,
271
52.8M
          -0.40824829046386296f,
272
52.8M
          0.0,
273
52.8M
          0.4082482904638634f,
274
52.8M
          0.408248290463863f,
275
52.8M
          0.0,
276
52.8M
      },
277
52.8M
      {
278
52.8M
          0.0,
279
52.8M
          0.0,
280
52.8M
          0.1747866975480809f,
281
52.8M
          0.0812611176717539f,
282
52.8M
          0.0,
283
52.8M
          0.0,
284
52.8M
          -0.3675398009862027f,
285
52.8M
          -0.307882213957909f,
286
52.8M
          -0.17478669754808135f,
287
52.8M
          0.3675398009862011f,
288
52.8M
          0.0,
289
52.8M
          0.4826689115059883f,
290
52.8M
          -0.08126111767175039f,
291
52.8M
          0.30788221395790305f,
292
52.8M
          -0.48266891150598584f,
293
52.8M
          0.0,
294
52.8M
      },
295
52.8M
      {
296
52.8M
          0.0,
297
52.8M
          0.0,
298
52.8M
          -0.21105601049335784f,
299
52.8M
          0.18567180916109802f,
300
52.8M
          0.0,
301
52.8M
          0.0,
302
52.8M
          0.49215859013738733f,
303
52.8M
          -0.38525013709251915f,
304
52.8M
          0.21105601049335806f,
305
52.8M
          -0.49215859013738905f,
306
52.8M
          0.0,
307
52.8M
          0.17419412659916217f,
308
52.8M
          -0.18567180916109904f,
309
52.8M
          0.3852501370925211f,
310
52.8M
          -0.1741941265991621f,
311
52.8M
          0.0,
312
52.8M
      },
313
52.8M
      {
314
52.8M
          0.0,
315
52.8M
          0.0,
316
52.8M
          -0.14266084808807264f,
317
52.8M
          -0.3416446842253372f,
318
52.8M
          0.0,
319
52.8M
          0.7367497537172237f,
320
52.8M
          0.24627107722075148f,
321
52.8M
          -0.08574019035519306f,
322
52.8M
          -0.14266084808807344f,
323
52.8M
          0.24627107722075137f,
324
52.8M
          0.14883399227113567f,
325
52.8M
          -0.04768680350229251f,
326
52.8M
          -0.3416446842253373f,
327
52.8M
          -0.08574019035519267f,
328
52.8M
          -0.047686803502292804f,
329
52.8M
          -0.14266084808807242f,
330
52.8M
      },
331
52.8M
      {
332
52.8M
          0.0,
333
52.8M
          0.0,
334
52.8M
          -0.13813540350758585f,
335
52.8M
          0.3302282550303788f,
336
52.8M
          0.0,
337
52.8M
          0.08755115000587084f,
338
52.8M
          -0.07946706605909573f,
339
52.8M
          -0.4613374887461511f,
340
52.8M
          -0.13813540350758294f,
341
52.8M
          -0.07946706605910261f,
342
52.8M
          0.49724647109535086f,
343
52.8M
          0.12538059448563663f,
344
52.8M
          0.3302282550303805f,
345
52.8M
          -0.4613374887461554f,
346
52.8M
          0.12538059448564315f,
347
52.8M
          -0.13813540350758452f,
348
52.8M
      },
349
52.8M
      {
350
52.8M
          0.0,
351
52.8M
          0.0,
352
52.8M
          -0.17437602599651067f,
353
52.8M
          0.0702790691196284f,
354
52.8M
          0.0,
355
52.8M
          -0.2921026642334881f,
356
52.8M
          0.3623817333531167f,
357
52.8M
          0.0,
358
52.8M
          -0.1743760259965108f,
359
52.8M
          0.36238173335311646f,
360
52.8M
          0.29210266423348785f,
361
52.8M
          -0.4326608024727445f,
362
52.8M
          0.07027906911962818f,
363
52.8M
          0.0,
364
52.8M
          -0.4326608024727457f,
365
52.8M
          0.34875205199302267f,
366
52.8M
      },
367
52.8M
      {
368
52.8M
          0.0,
369
52.8M
          0.0,
370
52.8M
          0.11354987314994337f,
371
52.8M
          -0.07417504595810355f,
372
52.8M
          0.0,
373
52.8M
          0.19402893032594343f,
374
52.8M
          -0.435190496523228f,
375
52.8M
          0.21918684838857466f,
376
52.8M
          0.11354987314994257f,
377
52.8M
          -0.4351904965232251f,
378
52.8M
          0.5550443808910661f,
379
52.8M
          -0.25468277124066463f,
380
52.8M
          -0.07417504595810233f,
381
52.8M
          0.2191868483885728f,
382
52.8M
          -0.25468277124066413f,
383
52.8M
          0.1135498731499429f,
384
52.8M
      },
385
52.8M
  };
386
387
52.8M
  const HWY_CAPPED(float, 16) d;
388
158M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
105M
    auto pixel = Zero(d);
390
1.79G
    for (size_t j = 0; j < 16; j++) {
391
1.69G
      auto cf = Set(d, coeffs[j]);
392
1.69G
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
1.69G
      pixel = MulAdd(cf, basis, pixel);
394
1.69G
    }
395
105M
    Store(pixel, d, pixels + i);
396
105M
  }
397
52.8M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
924k
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
924k
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
924k
      {
98
924k
          0.25,
99
924k
          0.25,
100
924k
          0.25,
101
924k
          0.25,
102
924k
          0.25,
103
924k
          0.25,
104
924k
          0.25,
105
924k
          0.25,
106
924k
          0.25,
107
924k
          0.25,
108
924k
          0.25,
109
924k
          0.25,
110
924k
          0.25,
111
924k
          0.25,
112
924k
          0.25,
113
924k
          0.25,
114
924k
      },
115
924k
      {
116
924k
          0.876902929799142f,
117
924k
          0.2206518106944235f,
118
924k
          -0.10140050393753763f,
119
924k
          -0.1014005039375375f,
120
924k
          0.2206518106944236f,
121
924k
          -0.10140050393753777f,
122
924k
          -0.10140050393753772f,
123
924k
          -0.10140050393753763f,
124
924k
          -0.10140050393753758f,
125
924k
          -0.10140050393753769f,
126
924k
          -0.1014005039375375f,
127
924k
          -0.10140050393753768f,
128
924k
          -0.10140050393753768f,
129
924k
          -0.10140050393753759f,
130
924k
          -0.10140050393753763f,
131
924k
          -0.10140050393753741f,
132
924k
      },
133
924k
      {
134
924k
          0.0,
135
924k
          0.0,
136
924k
          0.40670075830260755f,
137
924k
          0.44444816619734445f,
138
924k
          0.0,
139
924k
          0.0,
140
924k
          0.19574399372042936f,
141
924k
          0.2929100136981264f,
142
924k
          -0.40670075830260716f,
143
924k
          -0.19574399372042872f,
144
924k
          0.0,
145
924k
          0.11379074460448091f,
146
924k
          -0.44444816619734384f,
147
924k
          -0.29291001369812636f,
148
924k
          -0.1137907446044814f,
149
924k
          0.0,
150
924k
      },
151
924k
      {
152
924k
          0.0,
153
924k
          0.0,
154
924k
          -0.21255748058288748f,
155
924k
          0.3085497062849767f,
156
924k
          0.0,
157
924k
          0.4706702258572536f,
158
924k
          -0.1621205195722993f,
159
924k
          0.0,
160
924k
          -0.21255748058287047f,
161
924k
          -0.16212051957228327f,
162
924k
          -0.47067022585725277f,
163
924k
          -0.1464291867126764f,
164
924k
          0.3085497062849487f,
165
924k
          0.0,
166
924k
          -0.14642918671266536f,
167
924k
          0.4251149611657548f,
168
924k
      },
169
924k
      {
170
924k
          0.0,
171
924k
          -0.7071067811865474f,
172
924k
          0.0,
173
924k
          0.0,
174
924k
          0.7071067811865476f,
175
924k
          0.0,
176
924k
          0.0,
177
924k
          0.0,
178
924k
          0.0,
179
924k
          0.0,
180
924k
          0.0,
181
924k
          0.0,
182
924k
          0.0,
183
924k
          0.0,
184
924k
          0.0,
185
924k
          0.0,
186
924k
      },
187
924k
      {
188
924k
          -0.4105377591765233f,
189
924k
          0.6235485373547691f,
190
924k
          -0.06435071657946274f,
191
924k
          -0.06435071657946266f,
192
924k
          0.6235485373547694f,
193
924k
          -0.06435071657946284f,
194
924k
          -0.0643507165794628f,
195
924k
          -0.06435071657946274f,
196
924k
          -0.06435071657946272f,
197
924k
          -0.06435071657946279f,
198
924k
          -0.06435071657946266f,
199
924k
          -0.06435071657946277f,
200
924k
          -0.06435071657946277f,
201
924k
          -0.06435071657946273f,
202
924k
          -0.06435071657946274f,
203
924k
          -0.0643507165794626f,
204
924k
      },
205
924k
      {
206
924k
          0.0,
207
924k
          0.0,
208
924k
          -0.4517556589999482f,
209
924k
          0.15854503551840063f,
210
924k
          0.0,
211
924k
          -0.04038515160822202f,
212
924k
          0.0074182263792423875f,
213
924k
          0.39351034269210167f,
214
924k
          -0.45175565899994635f,
215
924k
          0.007418226379244351f,
216
924k
          0.1107416575309343f,
217
924k
          0.08298163094882051f,
218
924k
          0.15854503551839705f,
219
924k
          0.3935103426921022f,
220
924k
          0.0829816309488214f,
221
924k
          -0.45175565899994796f,
222
924k
      },
223
924k
      {
224
924k
          0.0,
225
924k
          0.0,
226
924k
          -0.304684750724869f,
227
924k
          0.5112616136591823f,
228
924k
          0.0,
229
924k
          0.0,
230
924k
          -0.290480129728998f,
231
924k
          -0.06578701549142804f,
232
924k
          0.304684750724884f,
233
924k
          0.2904801297290076f,
234
924k
          0.0,
235
924k
          -0.23889773523344604f,
236
924k
          -0.5112616136592012f,
237
924k
          0.06578701549142545f,
238
924k
          0.23889773523345467f,
239
924k
          0.0,
240
924k
      },
241
924k
      {
242
924k
          0.0,
243
924k
          0.0,
244
924k
          0.3017929516615495f,
245
924k
          0.25792362796341184f,
246
924k
          0.0,
247
924k
          0.16272340142866204f,
248
924k
          0.09520022653475037f,
249
924k
          0.0,
250
924k
          0.3017929516615503f,
251
924k
          0.09520022653475055f,
252
924k
          -0.16272340142866173f,
253
924k
          -0.35312385449816297f,
254
924k
          0.25792362796341295f,
255
924k
          0.0,
256
924k
          -0.3531238544981624f,
257
924k
          -0.6035859033230976f,
258
924k
      },
259
924k
      {
260
924k
          0.0,
261
924k
          0.0,
262
924k
          0.40824829046386274f,
263
924k
          0.0,
264
924k
          0.0,
265
924k
          0.0,
266
924k
          0.0,
267
924k
          -0.4082482904638628f,
268
924k
          -0.4082482904638635f,
269
924k
          0.0,
270
924k
          0.0,
271
924k
          -0.40824829046386296f,
272
924k
          0.0,
273
924k
          0.4082482904638634f,
274
924k
          0.408248290463863f,
275
924k
          0.0,
276
924k
      },
277
924k
      {
278
924k
          0.0,
279
924k
          0.0,
280
924k
          0.1747866975480809f,
281
924k
          0.0812611176717539f,
282
924k
          0.0,
283
924k
          0.0,
284
924k
          -0.3675398009862027f,
285
924k
          -0.307882213957909f,
286
924k
          -0.17478669754808135f,
287
924k
          0.3675398009862011f,
288
924k
          0.0,
289
924k
          0.4826689115059883f,
290
924k
          -0.08126111767175039f,
291
924k
          0.30788221395790305f,
292
924k
          -0.48266891150598584f,
293
924k
          0.0,
294
924k
      },
295
924k
      {
296
924k
          0.0,
297
924k
          0.0,
298
924k
          -0.21105601049335784f,
299
924k
          0.18567180916109802f,
300
924k
          0.0,
301
924k
          0.0,
302
924k
          0.49215859013738733f,
303
924k
          -0.38525013709251915f,
304
924k
          0.21105601049335806f,
305
924k
          -0.49215859013738905f,
306
924k
          0.0,
307
924k
          0.17419412659916217f,
308
924k
          -0.18567180916109904f,
309
924k
          0.3852501370925211f,
310
924k
          -0.1741941265991621f,
311
924k
          0.0,
312
924k
      },
313
924k
      {
314
924k
          0.0,
315
924k
          0.0,
316
924k
          -0.14266084808807264f,
317
924k
          -0.3416446842253372f,
318
924k
          0.0,
319
924k
          0.7367497537172237f,
320
924k
          0.24627107722075148f,
321
924k
          -0.08574019035519306f,
322
924k
          -0.14266084808807344f,
323
924k
          0.24627107722075137f,
324
924k
          0.14883399227113567f,
325
924k
          -0.04768680350229251f,
326
924k
          -0.3416446842253373f,
327
924k
          -0.08574019035519267f,
328
924k
          -0.047686803502292804f,
329
924k
          -0.14266084808807242f,
330
924k
      },
331
924k
      {
332
924k
          0.0,
333
924k
          0.0,
334
924k
          -0.13813540350758585f,
335
924k
          0.3302282550303788f,
336
924k
          0.0,
337
924k
          0.08755115000587084f,
338
924k
          -0.07946706605909573f,
339
924k
          -0.4613374887461511f,
340
924k
          -0.13813540350758294f,
341
924k
          -0.07946706605910261f,
342
924k
          0.49724647109535086f,
343
924k
          0.12538059448563663f,
344
924k
          0.3302282550303805f,
345
924k
          -0.4613374887461554f,
346
924k
          0.12538059448564315f,
347
924k
          -0.13813540350758452f,
348
924k
      },
349
924k
      {
350
924k
          0.0,
351
924k
          0.0,
352
924k
          -0.17437602599651067f,
353
924k
          0.0702790691196284f,
354
924k
          0.0,
355
924k
          -0.2921026642334881f,
356
924k
          0.3623817333531167f,
357
924k
          0.0,
358
924k
          -0.1743760259965108f,
359
924k
          0.36238173335311646f,
360
924k
          0.29210266423348785f,
361
924k
          -0.4326608024727445f,
362
924k
          0.07027906911962818f,
363
924k
          0.0,
364
924k
          -0.4326608024727457f,
365
924k
          0.34875205199302267f,
366
924k
      },
367
924k
      {
368
924k
          0.0,
369
924k
          0.0,
370
924k
          0.11354987314994337f,
371
924k
          -0.07417504595810355f,
372
924k
          0.0,
373
924k
          0.19402893032594343f,
374
924k
          -0.435190496523228f,
375
924k
          0.21918684838857466f,
376
924k
          0.11354987314994257f,
377
924k
          -0.4351904965232251f,
378
924k
          0.5550443808910661f,
379
924k
          -0.25468277124066463f,
380
924k
          -0.07417504595810233f,
381
924k
          0.2191868483885728f,
382
924k
          -0.25468277124066413f,
383
924k
          0.1135498731499429f,
384
924k
      },
385
924k
  };
386
387
924k
  const HWY_CAPPED(float, 16) d;
388
2.77M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
1.84M
    auto pixel = Zero(d);
390
31.4M
    for (size_t j = 0; j < 16; j++) {
391
29.5M
      auto cf = Set(d, coeffs[j]);
392
29.5M
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
29.5M
      pixel = MulAdd(cf, basis, pixel);
394
29.5M
    }
395
1.84M
    Store(pixel, d, pixels + i);
396
1.84M
  }
397
924k
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
398
399
template <size_t afv_kind>
400
void AFVTransformToPixels(const float* JXL_RESTRICT coefficients,
401
53.7M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
53.7M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
53.7M
  size_t afv_x = afv_kind & 1;
404
53.7M
  size_t afv_y = afv_kind / 2;
405
53.7M
  float dcs[3] = {};
406
53.7M
  float block00 = coefficients[0];
407
53.7M
  float block01 = coefficients[1];
408
53.7M
  float block10 = coefficients[8];
409
53.7M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
53.7M
  dcs[1] = (block00 + block10 - block01);
411
53.7M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
53.7M
  HWY_ALIGN float coeff[4 * 4];
414
53.7M
  coeff[0] = dcs[0];
415
268M
  for (size_t iy = 0; iy < 4; iy++) {
416
1.07G
    for (size_t ix = 0; ix < 4; ix++) {
417
860M
      if (ix == 0 && iy == 0) continue;
418
806M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
806M
    }
420
215M
  }
421
53.7M
  HWY_ALIGN float block[4 * 8];
422
53.7M
  AFVIDCT4x4(coeff, block);
423
268M
  for (size_t iy = 0; iy < 4; iy++) {
424
1.07G
    for (size_t ix = 0; ix < 4; ix++) {
425
860M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
860M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
860M
    }
428
215M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
53.7M
  block[0] = dcs[1];
431
268M
  for (size_t iy = 0; iy < 4; iy++) {
432
1.07G
    for (size_t ix = 0; ix < 4; ix++) {
433
860M
      if (ix == 0 && iy == 0) continue;
434
806M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
806M
    }
436
215M
  }
437
53.7M
  ComputeScaledIDCT<4, 4>()(
438
53.7M
      block,
439
53.7M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
53.7M
            pixels_stride),
441
53.7M
      scratch_space);
442
  // IDCT4x8.
443
53.7M
  block[0] = dcs[2];
444
268M
  for (size_t iy = 0; iy < 4; iy++) {
445
1.93G
    for (size_t ix = 0; ix < 8; ix++) {
446
1.72G
      if (ix == 0 && iy == 0) continue;
447
1.66G
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
1.66G
    }
449
215M
  }
450
53.7M
  ComputeScaledIDCT<4, 8>()(
451
53.7M
      block,
452
53.7M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
53.7M
      scratch_space);
454
53.7M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
13.2M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
13.2M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
13.2M
  size_t afv_x = afv_kind & 1;
404
13.2M
  size_t afv_y = afv_kind / 2;
405
13.2M
  float dcs[3] = {};
406
13.2M
  float block00 = coefficients[0];
407
13.2M
  float block01 = coefficients[1];
408
13.2M
  float block10 = coefficients[8];
409
13.2M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
13.2M
  dcs[1] = (block00 + block10 - block01);
411
13.2M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
13.2M
  HWY_ALIGN float coeff[4 * 4];
414
13.2M
  coeff[0] = dcs[0];
415
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
416
264M
    for (size_t ix = 0; ix < 4; ix++) {
417
211M
      if (ix == 0 && iy == 0) continue;
418
198M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
198M
    }
420
52.8M
  }
421
13.2M
  HWY_ALIGN float block[4 * 8];
422
13.2M
  AFVIDCT4x4(coeff, block);
423
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
424
264M
    for (size_t ix = 0; ix < 4; ix++) {
425
211M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
211M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
211M
    }
428
52.8M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
13.2M
  block[0] = dcs[1];
431
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
432
264M
    for (size_t ix = 0; ix < 4; ix++) {
433
211M
      if (ix == 0 && iy == 0) continue;
434
198M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
198M
    }
436
52.8M
  }
437
13.2M
  ComputeScaledIDCT<4, 4>()(
438
13.2M
      block,
439
13.2M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
13.2M
            pixels_stride),
441
13.2M
      scratch_space);
442
  // IDCT4x8.
443
13.2M
  block[0] = dcs[2];
444
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
445
475M
    for (size_t ix = 0; ix < 8; ix++) {
446
422M
      if (ix == 0 && iy == 0) continue;
447
409M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
409M
    }
449
52.8M
  }
450
13.2M
  ComputeScaledIDCT<4, 8>()(
451
13.2M
      block,
452
13.2M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
13.2M
      scratch_space);
454
13.2M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
13.2M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
13.2M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
13.2M
  size_t afv_x = afv_kind & 1;
404
13.2M
  size_t afv_y = afv_kind / 2;
405
13.2M
  float dcs[3] = {};
406
13.2M
  float block00 = coefficients[0];
407
13.2M
  float block01 = coefficients[1];
408
13.2M
  float block10 = coefficients[8];
409
13.2M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
13.2M
  dcs[1] = (block00 + block10 - block01);
411
13.2M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
13.2M
  HWY_ALIGN float coeff[4 * 4];
414
13.2M
  coeff[0] = dcs[0];
415
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
416
264M
    for (size_t ix = 0; ix < 4; ix++) {
417
211M
      if (ix == 0 && iy == 0) continue;
418
198M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
198M
    }
420
52.8M
  }
421
13.2M
  HWY_ALIGN float block[4 * 8];
422
13.2M
  AFVIDCT4x4(coeff, block);
423
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
424
264M
    for (size_t ix = 0; ix < 4; ix++) {
425
211M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
211M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
211M
    }
428
52.8M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
13.2M
  block[0] = dcs[1];
431
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
432
264M
    for (size_t ix = 0; ix < 4; ix++) {
433
211M
      if (ix == 0 && iy == 0) continue;
434
198M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
198M
    }
436
52.8M
  }
437
13.2M
  ComputeScaledIDCT<4, 4>()(
438
13.2M
      block,
439
13.2M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
13.2M
            pixels_stride),
441
13.2M
      scratch_space);
442
  // IDCT4x8.
443
13.2M
  block[0] = dcs[2];
444
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
445
475M
    for (size_t ix = 0; ix < 8; ix++) {
446
422M
      if (ix == 0 && iy == 0) continue;
447
409M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
409M
    }
449
52.8M
  }
450
13.2M
  ComputeScaledIDCT<4, 8>()(
451
13.2M
      block,
452
13.2M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
13.2M
      scratch_space);
454
13.2M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
13.2M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
13.2M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
13.2M
  size_t afv_x = afv_kind & 1;
404
13.2M
  size_t afv_y = afv_kind / 2;
405
13.2M
  float dcs[3] = {};
406
13.2M
  float block00 = coefficients[0];
407
13.2M
  float block01 = coefficients[1];
408
13.2M
  float block10 = coefficients[8];
409
13.2M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
13.2M
  dcs[1] = (block00 + block10 - block01);
411
13.2M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
13.2M
  HWY_ALIGN float coeff[4 * 4];
414
13.2M
  coeff[0] = dcs[0];
415
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
416
264M
    for (size_t ix = 0; ix < 4; ix++) {
417
211M
      if (ix == 0 && iy == 0) continue;
418
198M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
198M
    }
420
52.8M
  }
421
13.2M
  HWY_ALIGN float block[4 * 8];
422
13.2M
  AFVIDCT4x4(coeff, block);
423
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
424
264M
    for (size_t ix = 0; ix < 4; ix++) {
425
211M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
211M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
211M
    }
428
52.8M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
13.2M
  block[0] = dcs[1];
431
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
432
264M
    for (size_t ix = 0; ix < 4; ix++) {
433
211M
      if (ix == 0 && iy == 0) continue;
434
198M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
198M
    }
436
52.8M
  }
437
13.2M
  ComputeScaledIDCT<4, 4>()(
438
13.2M
      block,
439
13.2M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
13.2M
            pixels_stride),
441
13.2M
      scratch_space);
442
  // IDCT4x8.
443
13.2M
  block[0] = dcs[2];
444
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
445
475M
    for (size_t ix = 0; ix < 8; ix++) {
446
422M
      if (ix == 0 && iy == 0) continue;
447
409M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
409M
    }
449
52.8M
  }
450
13.2M
  ComputeScaledIDCT<4, 8>()(
451
13.2M
      block,
452
13.2M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
13.2M
      scratch_space);
454
13.2M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
13.2M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
13.2M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
13.2M
  size_t afv_x = afv_kind & 1;
404
13.2M
  size_t afv_y = afv_kind / 2;
405
13.2M
  float dcs[3] = {};
406
13.2M
  float block00 = coefficients[0];
407
13.2M
  float block01 = coefficients[1];
408
13.2M
  float block10 = coefficients[8];
409
13.2M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
13.2M
  dcs[1] = (block00 + block10 - block01);
411
13.2M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
13.2M
  HWY_ALIGN float coeff[4 * 4];
414
13.2M
  coeff[0] = dcs[0];
415
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
416
264M
    for (size_t ix = 0; ix < 4; ix++) {
417
211M
      if (ix == 0 && iy == 0) continue;
418
198M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
198M
    }
420
52.8M
  }
421
13.2M
  HWY_ALIGN float block[4 * 8];
422
13.2M
  AFVIDCT4x4(coeff, block);
423
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
424
264M
    for (size_t ix = 0; ix < 4; ix++) {
425
211M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
211M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
211M
    }
428
52.8M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
13.2M
  block[0] = dcs[1];
431
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
432
264M
    for (size_t ix = 0; ix < 4; ix++) {
433
211M
      if (ix == 0 && iy == 0) continue;
434
198M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
198M
    }
436
52.8M
  }
437
13.2M
  ComputeScaledIDCT<4, 4>()(
438
13.2M
      block,
439
13.2M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
13.2M
            pixels_stride),
441
13.2M
      scratch_space);
442
  // IDCT4x8.
443
13.2M
  block[0] = dcs[2];
444
66.0M
  for (size_t iy = 0; iy < 4; iy++) {
445
475M
    for (size_t ix = 0; ix < 8; ix++) {
446
422M
      if (ix == 0 && iy == 0) continue;
447
409M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
409M
    }
449
52.8M
  }
450
13.2M
  ComputeScaledIDCT<4, 8>()(
451
13.2M
      block,
452
13.2M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
13.2M
      scratch_space);
454
13.2M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
302k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
302k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
302k
  size_t afv_x = afv_kind & 1;
404
302k
  size_t afv_y = afv_kind / 2;
405
302k
  float dcs[3] = {};
406
302k
  float block00 = coefficients[0];
407
302k
  float block01 = coefficients[1];
408
302k
  float block10 = coefficients[8];
409
302k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
302k
  dcs[1] = (block00 + block10 - block01);
411
302k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
302k
  HWY_ALIGN float coeff[4 * 4];
414
302k
  coeff[0] = dcs[0];
415
1.51M
  for (size_t iy = 0; iy < 4; iy++) {
416
6.04M
    for (size_t ix = 0; ix < 4; ix++) {
417
4.83M
      if (ix == 0 && iy == 0) continue;
418
4.53M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
4.53M
    }
420
1.20M
  }
421
302k
  HWY_ALIGN float block[4 * 8];
422
302k
  AFVIDCT4x4(coeff, block);
423
1.51M
  for (size_t iy = 0; iy < 4; iy++) {
424
6.04M
    for (size_t ix = 0; ix < 4; ix++) {
425
4.83M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
4.83M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
4.83M
    }
428
1.20M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
302k
  block[0] = dcs[1];
431
1.51M
  for (size_t iy = 0; iy < 4; iy++) {
432
6.04M
    for (size_t ix = 0; ix < 4; ix++) {
433
4.83M
      if (ix == 0 && iy == 0) continue;
434
4.53M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
4.53M
    }
436
1.20M
  }
437
302k
  ComputeScaledIDCT<4, 4>()(
438
302k
      block,
439
302k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
302k
            pixels_stride),
441
302k
      scratch_space);
442
  // IDCT4x8.
443
302k
  block[0] = dcs[2];
444
1.51M
  for (size_t iy = 0; iy < 4; iy++) {
445
10.8M
    for (size_t ix = 0; ix < 8; ix++) {
446
9.67M
      if (ix == 0 && iy == 0) continue;
447
9.36M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
9.36M
    }
449
1.20M
  }
450
302k
  ComputeScaledIDCT<4, 8>()(
451
302k
      block,
452
302k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
302k
      scratch_space);
454
302k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
176k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
176k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
176k
  size_t afv_x = afv_kind & 1;
404
176k
  size_t afv_y = afv_kind / 2;
405
176k
  float dcs[3] = {};
406
176k
  float block00 = coefficients[0];
407
176k
  float block01 = coefficients[1];
408
176k
  float block10 = coefficients[8];
409
176k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
176k
  dcs[1] = (block00 + block10 - block01);
411
176k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
176k
  HWY_ALIGN float coeff[4 * 4];
414
176k
  coeff[0] = dcs[0];
415
881k
  for (size_t iy = 0; iy < 4; iy++) {
416
3.52M
    for (size_t ix = 0; ix < 4; ix++) {
417
2.81M
      if (ix == 0 && iy == 0) continue;
418
2.64M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
2.64M
    }
420
704k
  }
421
176k
  HWY_ALIGN float block[4 * 8];
422
176k
  AFVIDCT4x4(coeff, block);
423
881k
  for (size_t iy = 0; iy < 4; iy++) {
424
3.52M
    for (size_t ix = 0; ix < 4; ix++) {
425
2.81M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
2.81M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
2.81M
    }
428
704k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
176k
  block[0] = dcs[1];
431
881k
  for (size_t iy = 0; iy < 4; iy++) {
432
3.52M
    for (size_t ix = 0; ix < 4; ix++) {
433
2.81M
      if (ix == 0 && iy == 0) continue;
434
2.64M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
2.64M
    }
436
704k
  }
437
176k
  ComputeScaledIDCT<4, 4>()(
438
176k
      block,
439
176k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
176k
            pixels_stride),
441
176k
      scratch_space);
442
  // IDCT4x8.
443
176k
  block[0] = dcs[2];
444
881k
  for (size_t iy = 0; iy < 4; iy++) {
445
6.34M
    for (size_t ix = 0; ix < 8; ix++) {
446
5.63M
      if (ix == 0 && iy == 0) continue;
447
5.46M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
5.46M
    }
449
704k
  }
450
176k
  ComputeScaledIDCT<4, 8>()(
451
176k
      block,
452
176k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
176k
      scratch_space);
454
176k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
214k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
214k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
214k
  size_t afv_x = afv_kind & 1;
404
214k
  size_t afv_y = afv_kind / 2;
405
214k
  float dcs[3] = {};
406
214k
  float block00 = coefficients[0];
407
214k
  float block01 = coefficients[1];
408
214k
  float block10 = coefficients[8];
409
214k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
214k
  dcs[1] = (block00 + block10 - block01);
411
214k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
214k
  HWY_ALIGN float coeff[4 * 4];
414
214k
  coeff[0] = dcs[0];
415
1.07M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.28M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.42M
      if (ix == 0 && iy == 0) continue;
418
3.21M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
3.21M
    }
420
856k
  }
421
214k
  HWY_ALIGN float block[4 * 8];
422
214k
  AFVIDCT4x4(coeff, block);
423
1.07M
  for (size_t iy = 0; iy < 4; iy++) {
424
4.28M
    for (size_t ix = 0; ix < 4; ix++) {
425
3.42M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
3.42M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
3.42M
    }
428
856k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
214k
  block[0] = dcs[1];
431
1.07M
  for (size_t iy = 0; iy < 4; iy++) {
432
4.28M
    for (size_t ix = 0; ix < 4; ix++) {
433
3.42M
      if (ix == 0 && iy == 0) continue;
434
3.21M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
3.21M
    }
436
856k
  }
437
214k
  ComputeScaledIDCT<4, 4>()(
438
214k
      block,
439
214k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
214k
            pixels_stride),
441
214k
      scratch_space);
442
  // IDCT4x8.
443
214k
  block[0] = dcs[2];
444
1.07M
  for (size_t iy = 0; iy < 4; iy++) {
445
7.70M
    for (size_t ix = 0; ix < 8; ix++) {
446
6.84M
      if (ix == 0 && iy == 0) continue;
447
6.63M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
6.63M
    }
449
856k
  }
450
214k
  ComputeScaledIDCT<4, 8>()(
451
214k
      block,
452
214k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
214k
      scratch_space);
454
214k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
231k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
231k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
231k
  size_t afv_x = afv_kind & 1;
404
231k
  size_t afv_y = afv_kind / 2;
405
231k
  float dcs[3] = {};
406
231k
  float block00 = coefficients[0];
407
231k
  float block01 = coefficients[1];
408
231k
  float block10 = coefficients[8];
409
231k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
231k
  dcs[1] = (block00 + block10 - block01);
411
231k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
231k
  HWY_ALIGN float coeff[4 * 4];
414
231k
  coeff[0] = dcs[0];
415
1.15M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.63M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.70M
      if (ix == 0 && iy == 0) continue;
418
3.47M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
3.47M
    }
420
927k
  }
421
231k
  HWY_ALIGN float block[4 * 8];
422
231k
  AFVIDCT4x4(coeff, block);
423
1.15M
  for (size_t iy = 0; iy < 4; iy++) {
424
4.63M
    for (size_t ix = 0; ix < 4; ix++) {
425
3.70M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
3.70M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
3.70M
    }
428
927k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
231k
  block[0] = dcs[1];
431
1.15M
  for (size_t iy = 0; iy < 4; iy++) {
432
4.63M
    for (size_t ix = 0; ix < 4; ix++) {
433
3.70M
      if (ix == 0 && iy == 0) continue;
434
3.47M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
3.47M
    }
436
927k
  }
437
231k
  ComputeScaledIDCT<4, 4>()(
438
231k
      block,
439
231k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
231k
            pixels_stride),
441
231k
      scratch_space);
442
  // IDCT4x8.
443
231k
  block[0] = dcs[2];
444
1.15M
  for (size_t iy = 0; iy < 4; iy++) {
445
8.34M
    for (size_t ix = 0; ix < 8; ix++) {
446
7.41M
      if (ix == 0 && iy == 0) continue;
447
7.18M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
7.18M
    }
449
927k
  }
450
231k
  ComputeScaledIDCT<4, 8>()(
451
231k
      block,
452
231k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
231k
      scratch_space);
454
231k
}
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
455
456
HWY_MAYBE_UNUSED void TransformToPixels(const AcStrategyType strategy,
457
                                        float* JXL_RESTRICT coefficients,
458
                                        float* JXL_RESTRICT pixels,
459
                                        size_t pixels_stride,
460
183M
                                        float* scratch_space) {
461
183M
  using Type = AcStrategyType;
462
183M
  switch (strategy) {
463
16.5M
    case Type::IDENTITY: {
464
16.5M
      float dcs[4] = {};
465
16.5M
      float block00 = coefficients[0];
466
16.5M
      float block01 = coefficients[1];
467
16.5M
      float block10 = coefficients[8];
468
16.5M
      float block11 = coefficients[9];
469
16.5M
      dcs[0] = block00 + block01 + block10 + block11;
470
16.5M
      dcs[1] = block00 + block01 - block10 - block11;
471
16.5M
      dcs[2] = block00 - block01 + block10 - block11;
472
16.5M
      dcs[3] = block00 - block01 - block10 + block11;
473
49.7M
      for (size_t y = 0; y < 2; y++) {
474
99.4M
        for (size_t x = 0; x < 2; x++) {
475
66.2M
          float block_dc = dcs[y * 2 + x];
476
66.2M
          float residual_sum = 0;
477
331M
          for (size_t iy = 0; iy < 4; iy++) {
478
1.32G
            for (size_t ix = 0; ix < 4; ix++) {
479
1.06G
              if (ix == 0 && iy == 0) continue;
480
994M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
994M
            }
482
265M
          }
483
66.2M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
66.2M
              block_dc - residual_sum * (1.0f / 16);
485
331M
          for (size_t iy = 0; iy < 4; iy++) {
486
1.32G
            for (size_t ix = 0; ix < 4; ix++) {
487
1.06G
              if (ix == 1 && iy == 1) continue;
488
994M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
994M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
994M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
994M
            }
492
265M
          }
493
66.2M
          pixels[y * 4 * pixels_stride + x * 4] =
494
66.2M
              coefficients[(y + 2) * 8 + x + 2] +
495
66.2M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
66.2M
        }
497
33.1M
      }
498
16.5M
      break;
499
0
    }
500
13.6M
    case Type::DCT8X4: {
501
13.6M
      float dcs[2] = {};
502
13.6M
      float block0 = coefficients[0];
503
13.6M
      float block1 = coefficients[8];
504
13.6M
      dcs[0] = block0 + block1;
505
13.6M
      dcs[1] = block0 - block1;
506
40.9M
      for (size_t x = 0; x < 2; x++) {
507
27.3M
        HWY_ALIGN float block[4 * 8];
508
27.3M
        block[0] = dcs[x];
509
136M
        for (size_t iy = 0; iy < 4; iy++) {
510
983M
          for (size_t ix = 0; ix < 8; ix++) {
511
873M
            if (ix == 0 && iy == 0) continue;
512
846M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
846M
          }
514
109M
        }
515
27.3M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
27.3M
                                  scratch_space);
517
27.3M
      }
518
13.6M
      break;
519
0
    }
520
13.4M
    case Type::DCT4X8: {
521
13.4M
      float dcs[2] = {};
522
13.4M
      float block0 = coefficients[0];
523
13.4M
      float block1 = coefficients[8];
524
13.4M
      dcs[0] = block0 + block1;
525
13.4M
      dcs[1] = block0 - block1;
526
40.2M
      for (size_t y = 0; y < 2; y++) {
527
26.8M
        HWY_ALIGN float block[4 * 8];
528
26.8M
        block[0] = dcs[y];
529
134M
        for (size_t iy = 0; iy < 4; iy++) {
530
966M
          for (size_t ix = 0; ix < 8; ix++) {
531
858M
            if (ix == 0 && iy == 0) continue;
532
831M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
831M
          }
534
107M
        }
535
26.8M
        ComputeScaledIDCT<4, 8>()(
536
26.8M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
26.8M
            scratch_space);
538
26.8M
      }
539
13.4M
      break;
540
0
    }
541
13.2M
    case Type::DCT4X4: {
542
13.2M
      float dcs[4] = {};
543
13.2M
      float block00 = coefficients[0];
544
13.2M
      float block01 = coefficients[1];
545
13.2M
      float block10 = coefficients[8];
546
13.2M
      float block11 = coefficients[9];
547
13.2M
      dcs[0] = block00 + block01 + block10 + block11;
548
13.2M
      dcs[1] = block00 + block01 - block10 - block11;
549
13.2M
      dcs[2] = block00 - block01 + block10 - block11;
550
13.2M
      dcs[3] = block00 - block01 - block10 + block11;
551
39.6M
      for (size_t y = 0; y < 2; y++) {
552
79.2M
        for (size_t x = 0; x < 2; x++) {
553
52.8M
          HWY_ALIGN float block[4 * 4];
554
52.8M
          block[0] = dcs[y * 2 + x];
555
264M
          for (size_t iy = 0; iy < 4; iy++) {
556
1.05G
            for (size_t ix = 0; ix < 4; ix++) {
557
845M
              if (ix == 0 && iy == 0) continue;
558
792M
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
792M
            }
560
211M
          }
561
52.8M
          ComputeScaledIDCT<4, 4>()(
562
52.8M
              block,
563
52.8M
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
52.8M
              scratch_space);
565
52.8M
        }
566
26.4M
      }
567
13.2M
      break;
568
0
    }
569
19.4M
    case Type::DCT2X2: {
570
19.4M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
19.4M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
19.4M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
19.4M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
19.4M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
174M
      for (size_t y = 0; y < kBlockDim; y++) {
576
1.39G
        for (size_t x = 0; x < kBlockDim; x++) {
577
1.24G
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
1.24G
        }
579
155M
      }
580
19.4M
      break;
581
0
    }
582
5.79M
    case Type::DCT16X16: {
583
5.79M
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
5.79M
                                  scratch_space);
585
5.79M
      break;
586
0
    }
587
11.1M
    case Type::DCT16X8: {
588
11.1M
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
11.1M
                                 scratch_space);
590
11.1M
      break;
591
0
    }
592
11.1M
    case Type::DCT8X16: {
593
11.1M
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
11.1M
                                 scratch_space);
595
11.1M
      break;
596
0
    }
597
45
    case Type::DCT32X8: {
598
45
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
45
                                 scratch_space);
600
45
      break;
601
0
    }
602
144
    case Type::DCT8X32: {
603
144
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
144
                                 scratch_space);
605
144
      break;
606
0
    }
607
2.24M
    case Type::DCT32X16: {
608
2.24M
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
2.24M
                                  scratch_space);
610
2.24M
      break;
611
0
    }
612
2.23M
    case Type::DCT16X32: {
613
2.23M
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
2.23M
                                  scratch_space);
615
2.23M
      break;
616
0
    }
617
1.34M
    case Type::DCT32X32: {
618
1.34M
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
1.34M
                                  scratch_space);
620
1.34M
      break;
621
0
    }
622
18.3M
    case Type::DCT: {
623
18.3M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
18.3M
                                scratch_space);
625
18.3M
      break;
626
0
    }
627
13.5M
    case Type::AFV0: {
628
13.5M
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
13.5M
      break;
630
0
    }
631
13.3M
    case Type::AFV1: {
632
13.3M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
13.3M
      break;
634
0
    }
635
13.4M
    case Type::AFV2: {
636
13.4M
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
13.4M
      break;
638
0
    }
639
13.4M
    case Type::AFV3: {
640
13.4M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
13.4M
      break;
642
0
    }
643
656k
    case Type::DCT64X32: {
644
656k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
656k
                                  scratch_space);
646
656k
      break;
647
0
    }
648
404k
    case Type::DCT32X64: {
649
404k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
404k
                                  scratch_space);
651
404k
      break;
652
0
    }
653
349k
    case Type::DCT64X64: {
654
349k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
349k
                                  scratch_space);
656
349k
      break;
657
0
    }
658
3
    case Type::DCT128X64: {
659
3
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
3
                                   scratch_space);
661
3
      break;
662
0
    }
663
3
    case Type::DCT64X128: {
664
3
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
3
                                   scratch_space);
666
3
      break;
667
0
    }
668
24
    case Type::DCT128X128: {
669
24
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
24
                                    scratch_space);
671
24
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
183M
  }
689
183M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
164M
                                        float* scratch_space) {
461
164M
  using Type = AcStrategyType;
462
164M
  switch (strategy) {
463
13.2M
    case Type::IDENTITY: {
464
13.2M
      float dcs[4] = {};
465
13.2M
      float block00 = coefficients[0];
466
13.2M
      float block01 = coefficients[1];
467
13.2M
      float block10 = coefficients[8];
468
13.2M
      float block11 = coefficients[9];
469
13.2M
      dcs[0] = block00 + block01 + block10 + block11;
470
13.2M
      dcs[1] = block00 + block01 - block10 - block11;
471
13.2M
      dcs[2] = block00 - block01 + block10 - block11;
472
13.2M
      dcs[3] = block00 - block01 - block10 + block11;
473
39.6M
      for (size_t y = 0; y < 2; y++) {
474
79.2M
        for (size_t x = 0; x < 2; x++) {
475
52.8M
          float block_dc = dcs[y * 2 + x];
476
52.8M
          float residual_sum = 0;
477
264M
          for (size_t iy = 0; iy < 4; iy++) {
478
1.05G
            for (size_t ix = 0; ix < 4; ix++) {
479
845M
              if (ix == 0 && iy == 0) continue;
480
792M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
792M
            }
482
211M
          }
483
52.8M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
52.8M
              block_dc - residual_sum * (1.0f / 16);
485
264M
          for (size_t iy = 0; iy < 4; iy++) {
486
1.05G
            for (size_t ix = 0; ix < 4; ix++) {
487
845M
              if (ix == 1 && iy == 1) continue;
488
792M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
792M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
792M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
792M
            }
492
211M
          }
493
52.8M
          pixels[y * 4 * pixels_stride + x * 4] =
494
52.8M
              coefficients[(y + 2) * 8 + x + 2] +
495
52.8M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
52.8M
        }
497
26.4M
      }
498
13.2M
      break;
499
0
    }
500
13.2M
    case Type::DCT8X4: {
501
13.2M
      float dcs[2] = {};
502
13.2M
      float block0 = coefficients[0];
503
13.2M
      float block1 = coefficients[8];
504
13.2M
      dcs[0] = block0 + block1;
505
13.2M
      dcs[1] = block0 - block1;
506
39.6M
      for (size_t x = 0; x < 2; x++) {
507
26.4M
        HWY_ALIGN float block[4 * 8];
508
26.4M
        block[0] = dcs[x];
509
132M
        for (size_t iy = 0; iy < 4; iy++) {
510
950M
          for (size_t ix = 0; ix < 8; ix++) {
511
845M
            if (ix == 0 && iy == 0) continue;
512
818M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
818M
          }
514
105M
        }
515
26.4M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
26.4M
                                  scratch_space);
517
26.4M
      }
518
13.2M
      break;
519
0
    }
520
13.2M
    case Type::DCT4X8: {
521
13.2M
      float dcs[2] = {};
522
13.2M
      float block0 = coefficients[0];
523
13.2M
      float block1 = coefficients[8];
524
13.2M
      dcs[0] = block0 + block1;
525
13.2M
      dcs[1] = block0 - block1;
526
39.6M
      for (size_t y = 0; y < 2; y++) {
527
26.4M
        HWY_ALIGN float block[4 * 8];
528
26.4M
        block[0] = dcs[y];
529
132M
        for (size_t iy = 0; iy < 4; iy++) {
530
950M
          for (size_t ix = 0; ix < 8; ix++) {
531
845M
            if (ix == 0 && iy == 0) continue;
532
818M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
818M
          }
534
105M
        }
535
26.4M
        ComputeScaledIDCT<4, 8>()(
536
26.4M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
26.4M
            scratch_space);
538
26.4M
      }
539
13.2M
      break;
540
0
    }
541
13.2M
    case Type::DCT4X4: {
542
13.2M
      float dcs[4] = {};
543
13.2M
      float block00 = coefficients[0];
544
13.2M
      float block01 = coefficients[1];
545
13.2M
      float block10 = coefficients[8];
546
13.2M
      float block11 = coefficients[9];
547
13.2M
      dcs[0] = block00 + block01 + block10 + block11;
548
13.2M
      dcs[1] = block00 + block01 - block10 - block11;
549
13.2M
      dcs[2] = block00 - block01 + block10 - block11;
550
13.2M
      dcs[3] = block00 - block01 - block10 + block11;
551
39.6M
      for (size_t y = 0; y < 2; y++) {
552
79.2M
        for (size_t x = 0; x < 2; x++) {
553
52.8M
          HWY_ALIGN float block[4 * 4];
554
52.8M
          block[0] = dcs[y * 2 + x];
555
264M
          for (size_t iy = 0; iy < 4; iy++) {
556
1.05G
            for (size_t ix = 0; ix < 4; ix++) {
557
845M
              if (ix == 0 && iy == 0) continue;
558
792M
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
792M
            }
560
211M
          }
561
52.8M
          ComputeScaledIDCT<4, 4>()(
562
52.8M
              block,
563
52.8M
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
52.8M
              scratch_space);
565
52.8M
        }
566
26.4M
      }
567
13.2M
      break;
568
0
    }
569
13.2M
    case Type::DCT2X2: {
570
13.2M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
13.2M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
13.2M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
13.2M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
13.2M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
118M
      for (size_t y = 0; y < kBlockDim; y++) {
576
950M
        for (size_t x = 0; x < kBlockDim; x++) {
577
845M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
845M
        }
579
105M
      }
580
13.2M
      break;
581
0
    }
582
5.27M
    case Type::DCT16X16: {
583
5.27M
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
5.27M
                                  scratch_space);
585
5.27M
      break;
586
0
    }
587
10.4M
    case Type::DCT16X8: {
588
10.4M
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
10.4M
                                 scratch_space);
590
10.4M
      break;
591
0
    }
592
10.4M
    case Type::DCT8X16: {
593
10.4M
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
10.4M
                                 scratch_space);
595
10.4M
      break;
596
0
    }
597
0
    case Type::DCT32X8: {
598
0
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
0
                                 scratch_space);
600
0
      break;
601
0
    }
602
0
    case Type::DCT8X32: {
603
0
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
0
                                 scratch_space);
605
0
      break;
606
0
    }
607
2.08M
    case Type::DCT32X16: {
608
2.08M
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
2.08M
                                  scratch_space);
610
2.08M
      break;
611
0
    }
612
2.06M
    case Type::DCT16X32: {
613
2.06M
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
2.06M
                                  scratch_space);
615
2.06M
      break;
616
0
    }
617
1.05M
    case Type::DCT32X32: {
618
1.05M
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
1.05M
                                  scratch_space);
620
1.05M
      break;
621
0
    }
622
13.2M
    case Type::DCT: {
623
13.2M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
13.2M
                                scratch_space);
625
13.2M
      break;
626
0
    }
627
13.2M
    case Type::AFV0: {
628
13.2M
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
13.2M
      break;
630
0
    }
631
13.2M
    case Type::AFV1: {
632
13.2M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
13.2M
      break;
634
0
    }
635
13.2M
    case Type::AFV2: {
636
13.2M
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
13.2M
      break;
638
0
    }
639
13.2M
    case Type::AFV3: {
640
13.2M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
13.2M
      break;
642
0
    }
643
633k
    case Type::DCT64X32: {
644
633k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
633k
                                  scratch_space);
646
633k
      break;
647
0
    }
648
384k
    case Type::DCT32X64: {
649
384k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
384k
                                  scratch_space);
651
384k
      break;
652
0
    }
653
182k
    case Type::DCT64X64: {
654
182k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
182k
                                  scratch_space);
656
182k
      break;
657
0
    }
658
0
    case Type::DCT128X64: {
659
0
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT64X128: {
664
0
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
0
    case Type::DCT128X128: {
669
0
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
0
                                    scratch_space);
671
0
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
164M
  }
689
164M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
19.1M
                                        float* scratch_space) {
461
19.1M
  using Type = AcStrategyType;
462
19.1M
  switch (strategy) {
463
3.36M
    case Type::IDENTITY: {
464
3.36M
      float dcs[4] = {};
465
3.36M
      float block00 = coefficients[0];
466
3.36M
      float block01 = coefficients[1];
467
3.36M
      float block10 = coefficients[8];
468
3.36M
      float block11 = coefficients[9];
469
3.36M
      dcs[0] = block00 + block01 + block10 + block11;
470
3.36M
      dcs[1] = block00 + block01 - block10 - block11;
471
3.36M
      dcs[2] = block00 - block01 + block10 - block11;
472
3.36M
      dcs[3] = block00 - block01 - block10 + block11;
473
10.0M
      for (size_t y = 0; y < 2; y++) {
474
20.1M
        for (size_t x = 0; x < 2; x++) {
475
13.4M
          float block_dc = dcs[y * 2 + x];
476
13.4M
          float residual_sum = 0;
477
67.2M
          for (size_t iy = 0; iy < 4; iy++) {
478
268M
            for (size_t ix = 0; ix < 4; ix++) {
479
215M
              if (ix == 0 && iy == 0) continue;
480
201M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
201M
            }
482
53.7M
          }
483
13.4M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
13.4M
              block_dc - residual_sum * (1.0f / 16);
485
67.2M
          for (size_t iy = 0; iy < 4; iy++) {
486
268M
            for (size_t ix = 0; ix < 4; ix++) {
487
215M
              if (ix == 1 && iy == 1) continue;
488
201M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
201M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
201M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
201M
            }
492
53.7M
          }
493
13.4M
          pixels[y * 4 * pixels_stride + x * 4] =
494
13.4M
              coefficients[(y + 2) * 8 + x + 2] +
495
13.4M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
13.4M
        }
497
6.72M
      }
498
3.36M
      break;
499
0
    }
500
447k
    case Type::DCT8X4: {
501
447k
      float dcs[2] = {};
502
447k
      float block0 = coefficients[0];
503
447k
      float block1 = coefficients[8];
504
447k
      dcs[0] = block0 + block1;
505
447k
      dcs[1] = block0 - block1;
506
1.34M
      for (size_t x = 0; x < 2; x++) {
507
895k
        HWY_ALIGN float block[4 * 8];
508
895k
        block[0] = dcs[x];
509
4.47M
        for (size_t iy = 0; iy < 4; iy++) {
510
32.2M
          for (size_t ix = 0; ix < 8; ix++) {
511
28.6M
            if (ix == 0 && iy == 0) continue;
512
27.7M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
27.7M
          }
514
3.58M
        }
515
895k
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
895k
                                  scratch_space);
517
895k
      }
518
447k
      break;
519
0
    }
520
211k
    case Type::DCT4X8: {
521
211k
      float dcs[2] = {};
522
211k
      float block0 = coefficients[0];
523
211k
      float block1 = coefficients[8];
524
211k
      dcs[0] = block0 + block1;
525
211k
      dcs[1] = block0 - block1;
526
633k
      for (size_t y = 0; y < 2; y++) {
527
422k
        HWY_ALIGN float block[4 * 8];
528
422k
        block[0] = dcs[y];
529
2.11M
        for (size_t iy = 0; iy < 4; iy++) {
530
15.2M
          for (size_t ix = 0; ix < 8; ix++) {
531
13.5M
            if (ix == 0 && iy == 0) continue;
532
13.0M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
13.0M
          }
534
1.68M
        }
535
422k
        ComputeScaledIDCT<4, 8>()(
536
422k
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
422k
            scratch_space);
538
422k
      }
539
211k
      break;
540
0
    }
541
4.33k
    case Type::DCT4X4: {
542
4.33k
      float dcs[4] = {};
543
4.33k
      float block00 = coefficients[0];
544
4.33k
      float block01 = coefficients[1];
545
4.33k
      float block10 = coefficients[8];
546
4.33k
      float block11 = coefficients[9];
547
4.33k
      dcs[0] = block00 + block01 + block10 + block11;
548
4.33k
      dcs[1] = block00 + block01 - block10 - block11;
549
4.33k
      dcs[2] = block00 - block01 + block10 - block11;
550
4.33k
      dcs[3] = block00 - block01 - block10 + block11;
551
13.0k
      for (size_t y = 0; y < 2; y++) {
552
26.0k
        for (size_t x = 0; x < 2; x++) {
553
17.3k
          HWY_ALIGN float block[4 * 4];
554
17.3k
          block[0] = dcs[y * 2 + x];
555
86.7k
          for (size_t iy = 0; iy < 4; iy++) {
556
347k
            for (size_t ix = 0; ix < 4; ix++) {
557
277k
              if (ix == 0 && iy == 0) continue;
558
260k
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
260k
            }
560
69.4k
          }
561
17.3k
          ComputeScaledIDCT<4, 4>()(
562
17.3k
              block,
563
17.3k
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
17.3k
              scratch_space);
565
17.3k
        }
566
8.67k
      }
567
4.33k
      break;
568
0
    }
569
6.21M
    case Type::DCT2X2: {
570
6.21M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
6.21M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
6.21M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
6.21M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
6.21M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
55.9M
      for (size_t y = 0; y < kBlockDim; y++) {
576
447M
        for (size_t x = 0; x < kBlockDim; x++) {
577
397M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
397M
        }
579
49.7M
      }
580
6.21M
      break;
581
0
    }
582
517k
    case Type::DCT16X16: {
583
517k
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
517k
                                  scratch_space);
585
517k
      break;
586
0
    }
587
704k
    case Type::DCT16X8: {
588
704k
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
704k
                                 scratch_space);
590
704k
      break;
591
0
    }
592
757k
    case Type::DCT8X16: {
593
757k
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
757k
                                 scratch_space);
595
757k
      break;
596
0
    }
597
45
    case Type::DCT32X8: {
598
45
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
45
                                 scratch_space);
600
45
      break;
601
0
    }
602
144
    case Type::DCT8X32: {
603
144
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
144
                                 scratch_space);
605
144
      break;
606
0
    }
607
164k
    case Type::DCT32X16: {
608
164k
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
164k
                                  scratch_space);
610
164k
      break;
611
0
    }
612
172k
    case Type::DCT16X32: {
613
172k
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
172k
                                  scratch_space);
615
172k
      break;
616
0
    }
617
295k
    case Type::DCT32X32: {
618
295k
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
295k
                                  scratch_space);
620
295k
      break;
621
0
    }
622
5.12M
    case Type::DCT: {
623
5.12M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
5.12M
                                scratch_space);
625
5.12M
      break;
626
0
    }
627
302k
    case Type::AFV0: {
628
302k
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
302k
      break;
630
0
    }
631
176k
    case Type::AFV1: {
632
176k
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
176k
      break;
634
0
    }
635
214k
    case Type::AFV2: {
636
214k
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
214k
      break;
638
0
    }
639
231k
    case Type::AFV3: {
640
231k
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
231k
      break;
642
0
    }
643
22.4k
    case Type::DCT64X32: {
644
22.4k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
22.4k
                                  scratch_space);
646
22.4k
      break;
647
0
    }
648
19.0k
    case Type::DCT32X64: {
649
19.0k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
19.0k
                                  scratch_space);
651
19.0k
      break;
652
0
    }
653
166k
    case Type::DCT64X64: {
654
166k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
166k
                                  scratch_space);
656
166k
      break;
657
0
    }
658
3
    case Type::DCT128X64: {
659
3
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
3
                                   scratch_space);
661
3
      break;
662
0
    }
663
3
    case Type::DCT64X128: {
664
3
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
3
                                   scratch_space);
666
3
      break;
667
0
    }
668
24
    case Type::DCT128X128: {
669
24
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
24
                                    scratch_space);
671
24
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
19.1M
  }
689
19.1M
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
690
691
HWY_MAYBE_UNUSED void LowestFrequenciesFromDC(const AcStrategyType strategy,
692
                                              const float* dc, size_t dc_stride,
693
                                              float* llf,
694
19.4M
                                              float* JXL_RESTRICT scratch) {
695
19.4M
  using Type = AcStrategyType;
696
19.4M
  HWY_ALIGN float warm_block[4 * 4];
697
19.4M
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
19.4M
  switch (strategy) {
699
704k
    case Type::DCT16X8: {
700
704k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
704k
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
704k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
704k
      break;
704
0
    }
705
757k
    case Type::DCT8X16: {
706
757k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
757k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
757k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
757k
      break;
710
0
    }
711
517k
    case Type::DCT16X16: {
712
517k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
517k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
517k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
517k
      break;
716
0
    }
717
45
    case Type::DCT32X8: {
718
45
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
45
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
45
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
45
      break;
722
0
    }
723
144
    case Type::DCT8X32: {
724
144
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
144
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
144
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
144
      break;
728
0
    }
729
164k
    case Type::DCT32X16: {
730
164k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
164k
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
164k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
164k
      break;
734
0
    }
735
172k
    case Type::DCT16X32: {
736
172k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
172k
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
172k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
172k
      break;
740
0
    }
741
295k
    case Type::DCT32X32: {
742
295k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
295k
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
295k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
295k
      break;
746
0
    }
747
22.4k
    case Type::DCT64X32: {
748
22.4k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
22.4k
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
22.4k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
22.4k
      break;
752
0
    }
753
19.0k
    case Type::DCT32X64: {
754
19.0k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
19.0k
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
19.0k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
19.0k
      break;
758
0
    }
759
166k
    case Type::DCT64X64: {
760
166k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
166k
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
166k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
166k
      break;
764
0
    }
765
3
    case Type::DCT128X64: {
766
3
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
3
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
3
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
3
      break;
770
0
    }
771
3
    case Type::DCT64X128: {
772
3
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
3
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
3
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
3
      break;
776
0
    }
777
24
    case Type::DCT128X128: {
778
24
      ReinterpretingDCT<
779
24
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
24
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
24
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
24
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
0
    case Type::DCT256X256: {
799
0
      ReinterpretingDCT<
800
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
0
      break;
804
0
    }
805
5.15M
    case Type::DCT:
806
11.3M
    case Type::DCT2X2:
807
11.3M
    case Type::DCT4X4:
808
11.5M
    case Type::DCT4X8:
809
12.0M
    case Type::DCT8X4:
810
12.3M
    case Type::AFV0:
811
12.5M
    case Type::AFV1:
812
12.7M
    case Type::AFV2:
813
12.9M
    case Type::AFV3:
814
16.6M
    case Type::IDENTITY:
815
16.6M
      llf[0] = dc[0];
816
16.6M
      break;
817
19.4M
  };
818
19.4M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
694
19.4M
                                              float* JXL_RESTRICT scratch) {
695
19.4M
  using Type = AcStrategyType;
696
19.4M
  HWY_ALIGN float warm_block[4 * 4];
697
19.4M
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
19.4M
  switch (strategy) {
699
704k
    case Type::DCT16X8: {
700
704k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
704k
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
704k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
704k
      break;
704
0
    }
705
757k
    case Type::DCT8X16: {
706
757k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
757k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
757k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
757k
      break;
710
0
    }
711
517k
    case Type::DCT16X16: {
712
517k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
517k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
517k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
517k
      break;
716
0
    }
717
45
    case Type::DCT32X8: {
718
45
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
45
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
45
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
45
      break;
722
0
    }
723
144
    case Type::DCT8X32: {
724
144
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
144
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
144
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
144
      break;
728
0
    }
729
164k
    case Type::DCT32X16: {
730
164k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
164k
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
164k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
164k
      break;
734
0
    }
735
172k
    case Type::DCT16X32: {
736
172k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
172k
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
172k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
172k
      break;
740
0
    }
741
295k
    case Type::DCT32X32: {
742
295k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
295k
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
295k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
295k
      break;
746
0
    }
747
22.4k
    case Type::DCT64X32: {
748
22.4k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
22.4k
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
22.4k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
22.4k
      break;
752
0
    }
753
19.0k
    case Type::DCT32X64: {
754
19.0k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
19.0k
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
19.0k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
19.0k
      break;
758
0
    }
759
166k
    case Type::DCT64X64: {
760
166k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
166k
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
166k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
166k
      break;
764
0
    }
765
3
    case Type::DCT128X64: {
766
3
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
3
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
3
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
3
      break;
770
0
    }
771
3
    case Type::DCT64X128: {
772
3
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
3
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
3
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
3
      break;
776
0
    }
777
24
    case Type::DCT128X128: {
778
24
      ReinterpretingDCT<
779
24
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
24
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
24
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
24
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
0
    case Type::DCT256X256: {
799
0
      ReinterpretingDCT<
800
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
0
      break;
804
0
    }
805
5.15M
    case Type::DCT:
806
11.3M
    case Type::DCT2X2:
807
11.3M
    case Type::DCT4X4:
808
11.5M
    case Type::DCT4X8:
809
12.0M
    case Type::DCT8X4:
810
12.3M
    case Type::AFV0:
811
12.5M
    case Type::AFV1:
812
12.7M
    case Type::AFV2:
813
12.9M
    case Type::AFV3:
814
16.6M
    case Type::IDENTITY:
815
16.6M
      llf[0] = dc[0];
816
16.6M
      break;
817
19.4M
  };
818
19.4M
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
819
820
}  // namespace
821
// NOLINTNEXTLINE(google-readability-namespace-comments)
822
}  // namespace HWY_NAMESPACE
823
}  // namespace jxl
824
HWY_AFTER_NAMESPACE();
825
826
#endif  // LIB_JXL_DEC_TRANSFORMS_INL_H_