Coverage Report

Created: 2025-12-13 07:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/dec_transforms-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include <cstring>
7
8
#include "lib/jxl/base/compiler_specific.h"
9
#include "lib/jxl/frame_dimensions.h"
10
11
#if defined(LIB_JXL_DEC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
12
#ifdef LIB_JXL_DEC_TRANSFORMS_INL_H_
13
#undef LIB_JXL_DEC_TRANSFORMS_INL_H_
14
#else
15
#define LIB_JXL_DEC_TRANSFORMS_INL_H_
16
#endif
17
18
#include <cstddef>
19
#include <hwy/highway.h>
20
21
#include "lib/jxl/ac_strategy.h"
22
#include "lib/jxl/dct-inl.h"
23
#include "lib/jxl/dct_scales.h"
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
namespace HWY_NAMESPACE {
27
namespace {
28
29
// These templates are not found via ADL.
30
using hwy::HWY_NAMESPACE::MulAdd;
31
32
// Computes the lowest-frequency LF_ROWSxLF_COLS-sized square in output, which
33
// is a DCT_ROWS*DCT_COLS-sized DCT block, by doing a ROWS*COLS DCT on the
34
// input block.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
JXL_INLINE void ReinterpretingDCT(const float* input, const size_t input_stride,
38
                                  float* output, const size_t output_stride,
39
                                  float* JXL_RESTRICT block,
40
2.55M
                                  float* JXL_RESTRICT scratch_space) {
41
2.55M
  static_assert(LF_ROWS == ROWS,
42
2.55M
                "ReinterpretingDCT should only be called with LF == N");
43
2.55M
  static_assert(LF_COLS == COLS,
44
2.55M
                "ReinterpretingDCT should only be called with LF == N");
45
2.55M
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
2.55M
                                 scratch_space);
47
2.55M
  if (ROWS < COLS) {
48
1.96M
    for (size_t y = 0; y < LF_ROWS; y++) {
49
4.16M
      for (size_t x = 0; x < LF_COLS; x++) {
50
3.08M
        output[y * output_stride + x] =
51
3.08M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
3.08M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
3.08M
      }
54
1.07M
    }
55
1.66M
  } else {
56
5.52M
    for (size_t y = 0; y < LF_COLS; y++) {
57
19.8M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
15.9M
        output[y * output_stride + x] =
59
15.9M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
15.9M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
15.9M
      }
62
3.85M
    }
63
1.66M
  }
64
2.55M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
650k
                                  float* JXL_RESTRICT scratch_space) {
41
650k
  static_assert(LF_ROWS == ROWS,
42
650k
                "ReinterpretingDCT should only be called with LF == N");
43
650k
  static_assert(LF_COLS == COLS,
44
650k
                "ReinterpretingDCT should only be called with LF == N");
45
650k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
650k
                                 scratch_space);
47
650k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
650k
  } else {
56
1.30M
    for (size_t y = 0; y < LF_COLS; y++) {
57
1.95M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.30M
        output[y * output_stride + x] =
59
1.30M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.30M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.30M
      }
62
650k
    }
63
650k
  }
64
650k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
726k
                                  float* JXL_RESTRICT scratch_space) {
41
726k
  static_assert(LF_ROWS == ROWS,
42
726k
                "ReinterpretingDCT should only be called with LF == N");
43
726k
  static_assert(LF_COLS == COLS,
44
726k
                "ReinterpretingDCT should only be called with LF == N");
45
726k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
726k
                                 scratch_space);
47
726k
  if (ROWS < COLS) {
48
1.45M
    for (size_t y = 0; y < LF_ROWS; y++) {
49
2.18M
      for (size_t x = 0; x < LF_COLS; x++) {
50
1.45M
        output[y * output_stride + x] =
51
1.45M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
1.45M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
1.45M
      }
54
726k
    }
55
726k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
726k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
485k
                                  float* JXL_RESTRICT scratch_space) {
41
485k
  static_assert(LF_ROWS == ROWS,
42
485k
                "ReinterpretingDCT should only be called with LF == N");
43
485k
  static_assert(LF_COLS == COLS,
44
485k
                "ReinterpretingDCT should only be called with LF == N");
45
485k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
485k
                                 scratch_space);
47
485k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
485k
  } else {
56
1.45M
    for (size_t y = 0; y < LF_COLS; y++) {
57
2.91M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.94M
        output[y * output_stride + x] =
59
1.94M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.94M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.94M
      }
62
971k
    }
63
485k
  }
64
485k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
693
                                  float* JXL_RESTRICT scratch_space) {
41
693
  static_assert(LF_ROWS == ROWS,
42
693
                "ReinterpretingDCT should only be called with LF == N");
43
693
  static_assert(LF_COLS == COLS,
44
693
                "ReinterpretingDCT should only be called with LF == N");
45
693
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
693
                                 scratch_space);
47
693
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
693
  } else {
56
1.38k
    for (size_t y = 0; y < LF_COLS; y++) {
57
3.46k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
2.77k
        output[y * output_stride + x] =
59
2.77k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
2.77k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
2.77k
      }
62
693
    }
63
693
  }
64
693
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
96
                                  float* JXL_RESTRICT scratch_space) {
41
96
  static_assert(LF_ROWS == ROWS,
42
96
                "ReinterpretingDCT should only be called with LF == N");
43
96
  static_assert(LF_COLS == COLS,
44
96
                "ReinterpretingDCT should only be called with LF == N");
45
96
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
96
                                 scratch_space);
47
96
  if (ROWS < COLS) {
48
192
    for (size_t y = 0; y < LF_ROWS; y++) {
49
480
      for (size_t x = 0; x < LF_COLS; x++) {
50
384
        output[y * output_stride + x] =
51
384
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
384
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
384
      }
54
96
    }
55
96
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
96
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
140k
                                  float* JXL_RESTRICT scratch_space) {
41
140k
  static_assert(LF_ROWS == ROWS,
42
140k
                "ReinterpretingDCT should only be called with LF == N");
43
140k
  static_assert(LF_COLS == COLS,
44
140k
                "ReinterpretingDCT should only be called with LF == N");
45
140k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
140k
                                 scratch_space);
47
140k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
140k
  } else {
56
422k
    for (size_t y = 0; y < LF_COLS; y++) {
57
1.40M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.12M
        output[y * output_stride + x] =
59
1.12M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.12M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.12M
      }
62
281k
    }
63
140k
  }
64
140k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
145k
                                  float* JXL_RESTRICT scratch_space) {
41
145k
  static_assert(LF_ROWS == ROWS,
42
145k
                "ReinterpretingDCT should only be called with LF == N");
43
145k
  static_assert(LF_COLS == COLS,
44
145k
                "ReinterpretingDCT should only be called with LF == N");
45
145k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
145k
                                 scratch_space);
47
145k
  if (ROWS < COLS) {
48
435k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
1.45M
      for (size_t x = 0; x < LF_COLS; x++) {
50
1.16M
        output[y * output_stride + x] =
51
1.16M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
1.16M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
1.16M
      }
54
290k
    }
55
145k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
145k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
252k
                                  float* JXL_RESTRICT scratch_space) {
41
252k
  static_assert(LF_ROWS == ROWS,
42
252k
                "ReinterpretingDCT should only be called with LF == N");
43
252k
  static_assert(LF_COLS == COLS,
44
252k
                "ReinterpretingDCT should only be called with LF == N");
45
252k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
252k
                                 scratch_space);
47
252k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
252k
  } else {
56
1.26M
    for (size_t y = 0; y < LF_COLS; y++) {
57
5.04M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
4.03M
        output[y * output_stride + x] =
59
4.03M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
4.03M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
4.03M
      }
62
1.00M
    }
63
252k
  }
64
252k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
38.8k
                                  float* JXL_RESTRICT scratch_space) {
41
38.8k
  static_assert(LF_ROWS == ROWS,
42
38.8k
                "ReinterpretingDCT should only be called with LF == N");
43
38.8k
  static_assert(LF_COLS == COLS,
44
38.8k
                "ReinterpretingDCT should only be called with LF == N");
45
38.8k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
38.8k
                                 scratch_space);
47
38.8k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
38.8k
  } else {
56
194k
    for (size_t y = 0; y < LF_COLS; y++) {
57
1.39M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.24M
        output[y * output_stride + x] =
59
1.24M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.24M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.24M
      }
62
155k
    }
63
38.8k
  }
64
38.8k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
14.6k
                                  float* JXL_RESTRICT scratch_space) {
41
14.6k
  static_assert(LF_ROWS == ROWS,
42
14.6k
                "ReinterpretingDCT should only be called with LF == N");
43
14.6k
  static_assert(LF_COLS == COLS,
44
14.6k
                "ReinterpretingDCT should only be called with LF == N");
45
14.6k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
14.6k
                                 scratch_space);
47
14.6k
  if (ROWS < COLS) {
48
73.3k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
528k
      for (size_t x = 0; x < LF_COLS; x++) {
50
469k
        output[y * output_stride + x] =
51
469k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
469k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
469k
      }
54
58.6k
    }
55
14.6k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
14.6k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
98.6k
                                  float* JXL_RESTRICT scratch_space) {
41
98.6k
  static_assert(LF_ROWS == ROWS,
42
98.6k
                "ReinterpretingDCT should only be called with LF == N");
43
98.6k
  static_assert(LF_COLS == COLS,
44
98.6k
                "ReinterpretingDCT should only be called with LF == N");
45
98.6k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
98.6k
                                 scratch_space);
47
98.6k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
98.6k
  } else {
56
887k
    for (size_t y = 0; y < LF_COLS; y++) {
57
7.10M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
6.31M
        output[y * output_stride + x] =
59
6.31M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
6.31M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
6.31M
      }
62
788k
    }
63
98.6k
  }
64
98.6k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
3
                                  float* JXL_RESTRICT scratch_space) {
41
3
  static_assert(LF_ROWS == ROWS,
42
3
                "ReinterpretingDCT should only be called with LF == N");
43
3
  static_assert(LF_COLS == COLS,
44
3
                "ReinterpretingDCT should only be called with LF == N");
45
3
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
3
                                 scratch_space);
47
3
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
3
  } else {
56
27
    for (size_t y = 0; y < LF_COLS; y++) {
57
408
      for (size_t x = 0; x < LF_ROWS; x++) {
58
384
        output[y * output_stride + x] =
59
384
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
384
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
384
      }
62
24
    }
63
3
  }
64
3
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
9
                                  float* JXL_RESTRICT scratch_space) {
41
9
  static_assert(LF_ROWS == ROWS,
42
9
                "ReinterpretingDCT should only be called with LF == N");
43
9
  static_assert(LF_COLS == COLS,
44
9
                "ReinterpretingDCT should only be called with LF == N");
45
9
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
9
                                 scratch_space);
47
9
  if (ROWS < COLS) {
48
81
    for (size_t y = 0; y < LF_ROWS; y++) {
49
1.22k
      for (size_t x = 0; x < LF_COLS; x++) {
50
1.15k
        output[y * output_stride + x] =
51
1.15k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
1.15k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
1.15k
      }
54
72
    }
55
9
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
9
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
6
                                  float* JXL_RESTRICT scratch_space) {
41
6
  static_assert(LF_ROWS == ROWS,
42
6
                "ReinterpretingDCT should only be called with LF == N");
43
6
  static_assert(LF_COLS == COLS,
44
6
                "ReinterpretingDCT should only be called with LF == N");
45
6
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
6
                                 scratch_space);
47
6
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
6
  } else {
56
102
    for (size_t y = 0; y < LF_COLS; y++) {
57
1.63k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.53k
        output[y * output_stride + x] =
59
1.53k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.53k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.53k
      }
62
96
    }
63
6
  }
64
6
}
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
65
66
template <size_t S>
67
51.0M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
51.0M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
51.0M
  static_assert(S % 2 == 0, "S should be even");
70
51.0M
  float temp[kDCTBlockSize];
71
51.0M
  constexpr size_t num_2x2 = S / 2;
72
170M
  for (size_t y = 0; y < num_2x2; y++) {
73
476M
    for (size_t x = 0; x < num_2x2; x++) {
74
357M
      float c00 = block[y * kBlockDim + x];
75
357M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
357M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
357M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
357M
      float r00 = c00 + c01 + c10 + c11;
79
357M
      float r01 = c00 + c01 - c10 - c11;
80
357M
      float r10 = c00 - c01 + c10 - c11;
81
357M
      float r11 = c00 - c01 - c10 + c11;
82
357M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
357M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
357M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
357M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
357M
    }
87
119M
  }
88
289M
  for (size_t y = 0; y < S; y++) {
89
1.66G
    for (size_t x = 0; x < S; x++) {
90
1.43G
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
1.43G
    }
92
238M
  }
93
51.0M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
11.0M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
11.0M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
11.0M
  static_assert(S % 2 == 0, "S should be even");
70
11.0M
  float temp[kDCTBlockSize];
71
11.0M
  constexpr size_t num_2x2 = S / 2;
72
22.1M
  for (size_t y = 0; y < num_2x2; y++) {
73
22.1M
    for (size_t x = 0; x < num_2x2; x++) {
74
11.0M
      float c00 = block[y * kBlockDim + x];
75
11.0M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
11.0M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
11.0M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
11.0M
      float r00 = c00 + c01 + c10 + c11;
79
11.0M
      float r01 = c00 + c01 - c10 - c11;
80
11.0M
      float r10 = c00 - c01 + c10 - c11;
81
11.0M
      float r11 = c00 - c01 - c10 + c11;
82
11.0M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
11.0M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
11.0M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
11.0M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
11.0M
    }
87
11.0M
  }
88
33.2M
  for (size_t y = 0; y < S; y++) {
89
66.4M
    for (size_t x = 0; x < S; x++) {
90
44.2M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
44.2M
    }
92
22.1M
  }
93
11.0M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
11.0M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
11.0M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
11.0M
  static_assert(S % 2 == 0, "S should be even");
70
11.0M
  float temp[kDCTBlockSize];
71
11.0M
  constexpr size_t num_2x2 = S / 2;
72
33.2M
  for (size_t y = 0; y < num_2x2; y++) {
73
66.4M
    for (size_t x = 0; x < num_2x2; x++) {
74
44.2M
      float c00 = block[y * kBlockDim + x];
75
44.2M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
44.2M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
44.2M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
44.2M
      float r00 = c00 + c01 + c10 + c11;
79
44.2M
      float r01 = c00 + c01 - c10 - c11;
80
44.2M
      float r10 = c00 - c01 + c10 - c11;
81
44.2M
      float r11 = c00 - c01 - c10 + c11;
82
44.2M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
44.2M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
44.2M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
44.2M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
44.2M
    }
87
22.1M
  }
88
55.3M
  for (size_t y = 0; y < S; y++) {
89
221M
    for (size_t x = 0; x < S; x++) {
90
177M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
177M
    }
92
44.2M
  }
93
11.0M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
11.0M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
11.0M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
11.0M
  static_assert(S % 2 == 0, "S should be even");
70
11.0M
  float temp[kDCTBlockSize];
71
11.0M
  constexpr size_t num_2x2 = S / 2;
72
55.3M
  for (size_t y = 0; y < num_2x2; y++) {
73
221M
    for (size_t x = 0; x < num_2x2; x++) {
74
177M
      float c00 = block[y * kBlockDim + x];
75
177M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
177M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
177M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
177M
      float r00 = c00 + c01 + c10 + c11;
79
177M
      float r01 = c00 + c01 - c10 - c11;
80
177M
      float r10 = c00 - c01 + c10 - c11;
81
177M
      float r11 = c00 - c01 - c10 + c11;
82
177M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
177M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
177M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
177M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
177M
    }
87
44.2M
  }
88
99.6M
  for (size_t y = 0; y < S; y++) {
89
796M
    for (size_t x = 0; x < S; x++) {
90
708M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
708M
    }
92
88.5M
  }
93
11.0M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
5.96M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
5.96M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
5.96M
  static_assert(S % 2 == 0, "S should be even");
70
5.96M
  float temp[kDCTBlockSize];
71
5.96M
  constexpr size_t num_2x2 = S / 2;
72
11.9M
  for (size_t y = 0; y < num_2x2; y++) {
73
11.9M
    for (size_t x = 0; x < num_2x2; x++) {
74
5.96M
      float c00 = block[y * kBlockDim + x];
75
5.96M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
5.96M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
5.96M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
5.96M
      float r00 = c00 + c01 + c10 + c11;
79
5.96M
      float r01 = c00 + c01 - c10 - c11;
80
5.96M
      float r10 = c00 - c01 + c10 - c11;
81
5.96M
      float r11 = c00 - c01 - c10 + c11;
82
5.96M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
5.96M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
5.96M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
5.96M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
5.96M
    }
87
5.96M
  }
88
17.8M
  for (size_t y = 0; y < S; y++) {
89
35.7M
    for (size_t x = 0; x < S; x++) {
90
23.8M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
23.8M
    }
92
11.9M
  }
93
5.96M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
5.96M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
5.96M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
5.96M
  static_assert(S % 2 == 0, "S should be even");
70
5.96M
  float temp[kDCTBlockSize];
71
5.96M
  constexpr size_t num_2x2 = S / 2;
72
17.8M
  for (size_t y = 0; y < num_2x2; y++) {
73
35.7M
    for (size_t x = 0; x < num_2x2; x++) {
74
23.8M
      float c00 = block[y * kBlockDim + x];
75
23.8M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
23.8M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
23.8M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
23.8M
      float r00 = c00 + c01 + c10 + c11;
79
23.8M
      float r01 = c00 + c01 - c10 - c11;
80
23.8M
      float r10 = c00 - c01 + c10 - c11;
81
23.8M
      float r11 = c00 - c01 - c10 + c11;
82
23.8M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
23.8M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
23.8M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
23.8M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
23.8M
    }
87
11.9M
  }
88
29.8M
  for (size_t y = 0; y < S; y++) {
89
119M
    for (size_t x = 0; x < S; x++) {
90
95.3M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
95.3M
    }
92
23.8M
  }
93
5.96M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
5.96M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
5.96M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
5.96M
  static_assert(S % 2 == 0, "S should be even");
70
5.96M
  float temp[kDCTBlockSize];
71
5.96M
  constexpr size_t num_2x2 = S / 2;
72
29.8M
  for (size_t y = 0; y < num_2x2; y++) {
73
119M
    for (size_t x = 0; x < num_2x2; x++) {
74
95.3M
      float c00 = block[y * kBlockDim + x];
75
95.3M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
95.3M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
95.3M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
95.3M
      float r00 = c00 + c01 + c10 + c11;
79
95.3M
      float r01 = c00 + c01 - c10 - c11;
80
95.3M
      float r10 = c00 - c01 + c10 - c11;
81
95.3M
      float r11 = c00 - c01 - c10 + c11;
82
95.3M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
95.3M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
95.3M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
95.3M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
95.3M
    }
87
23.8M
  }
88
53.6M
  for (size_t y = 0; y < S; y++) {
89
429M
    for (size_t x = 0; x < S; x++) {
90
381M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
381M
    }
92
47.6M
  }
93
5.96M
}
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
94
95
45.1M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
45.1M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
45.1M
      {
98
45.1M
          0.25,
99
45.1M
          0.25,
100
45.1M
          0.25,
101
45.1M
          0.25,
102
45.1M
          0.25,
103
45.1M
          0.25,
104
45.1M
          0.25,
105
45.1M
          0.25,
106
45.1M
          0.25,
107
45.1M
          0.25,
108
45.1M
          0.25,
109
45.1M
          0.25,
110
45.1M
          0.25,
111
45.1M
          0.25,
112
45.1M
          0.25,
113
45.1M
          0.25,
114
45.1M
      },
115
45.1M
      {
116
45.1M
          0.876902929799142f,
117
45.1M
          0.2206518106944235f,
118
45.1M
          -0.10140050393753763f,
119
45.1M
          -0.1014005039375375f,
120
45.1M
          0.2206518106944236f,
121
45.1M
          -0.10140050393753777f,
122
45.1M
          -0.10140050393753772f,
123
45.1M
          -0.10140050393753763f,
124
45.1M
          -0.10140050393753758f,
125
45.1M
          -0.10140050393753769f,
126
45.1M
          -0.1014005039375375f,
127
45.1M
          -0.10140050393753768f,
128
45.1M
          -0.10140050393753768f,
129
45.1M
          -0.10140050393753759f,
130
45.1M
          -0.10140050393753763f,
131
45.1M
          -0.10140050393753741f,
132
45.1M
      },
133
45.1M
      {
134
45.1M
          0.0,
135
45.1M
          0.0,
136
45.1M
          0.40670075830260755f,
137
45.1M
          0.44444816619734445f,
138
45.1M
          0.0,
139
45.1M
          0.0,
140
45.1M
          0.19574399372042936f,
141
45.1M
          0.2929100136981264f,
142
45.1M
          -0.40670075830260716f,
143
45.1M
          -0.19574399372042872f,
144
45.1M
          0.0,
145
45.1M
          0.11379074460448091f,
146
45.1M
          -0.44444816619734384f,
147
45.1M
          -0.29291001369812636f,
148
45.1M
          -0.1137907446044814f,
149
45.1M
          0.0,
150
45.1M
      },
151
45.1M
      {
152
45.1M
          0.0,
153
45.1M
          0.0,
154
45.1M
          -0.21255748058288748f,
155
45.1M
          0.3085497062849767f,
156
45.1M
          0.0,
157
45.1M
          0.4706702258572536f,
158
45.1M
          -0.1621205195722993f,
159
45.1M
          0.0,
160
45.1M
          -0.21255748058287047f,
161
45.1M
          -0.16212051957228327f,
162
45.1M
          -0.47067022585725277f,
163
45.1M
          -0.1464291867126764f,
164
45.1M
          0.3085497062849487f,
165
45.1M
          0.0,
166
45.1M
          -0.14642918671266536f,
167
45.1M
          0.4251149611657548f,
168
45.1M
      },
169
45.1M
      {
170
45.1M
          0.0,
171
45.1M
          -0.7071067811865474f,
172
45.1M
          0.0,
173
45.1M
          0.0,
174
45.1M
          0.7071067811865476f,
175
45.1M
          0.0,
176
45.1M
          0.0,
177
45.1M
          0.0,
178
45.1M
          0.0,
179
45.1M
          0.0,
180
45.1M
          0.0,
181
45.1M
          0.0,
182
45.1M
          0.0,
183
45.1M
          0.0,
184
45.1M
          0.0,
185
45.1M
          0.0,
186
45.1M
      },
187
45.1M
      {
188
45.1M
          -0.4105377591765233f,
189
45.1M
          0.6235485373547691f,
190
45.1M
          -0.06435071657946274f,
191
45.1M
          -0.06435071657946266f,
192
45.1M
          0.6235485373547694f,
193
45.1M
          -0.06435071657946284f,
194
45.1M
          -0.0643507165794628f,
195
45.1M
          -0.06435071657946274f,
196
45.1M
          -0.06435071657946272f,
197
45.1M
          -0.06435071657946279f,
198
45.1M
          -0.06435071657946266f,
199
45.1M
          -0.06435071657946277f,
200
45.1M
          -0.06435071657946277f,
201
45.1M
          -0.06435071657946273f,
202
45.1M
          -0.06435071657946274f,
203
45.1M
          -0.0643507165794626f,
204
45.1M
      },
205
45.1M
      {
206
45.1M
          0.0,
207
45.1M
          0.0,
208
45.1M
          -0.4517556589999482f,
209
45.1M
          0.15854503551840063f,
210
45.1M
          0.0,
211
45.1M
          -0.04038515160822202f,
212
45.1M
          0.0074182263792423875f,
213
45.1M
          0.39351034269210167f,
214
45.1M
          -0.45175565899994635f,
215
45.1M
          0.007418226379244351f,
216
45.1M
          0.1107416575309343f,
217
45.1M
          0.08298163094882051f,
218
45.1M
          0.15854503551839705f,
219
45.1M
          0.3935103426921022f,
220
45.1M
          0.0829816309488214f,
221
45.1M
          -0.45175565899994796f,
222
45.1M
      },
223
45.1M
      {
224
45.1M
          0.0,
225
45.1M
          0.0,
226
45.1M
          -0.304684750724869f,
227
45.1M
          0.5112616136591823f,
228
45.1M
          0.0,
229
45.1M
          0.0,
230
45.1M
          -0.290480129728998f,
231
45.1M
          -0.06578701549142804f,
232
45.1M
          0.304684750724884f,
233
45.1M
          0.2904801297290076f,
234
45.1M
          0.0,
235
45.1M
          -0.23889773523344604f,
236
45.1M
          -0.5112616136592012f,
237
45.1M
          0.06578701549142545f,
238
45.1M
          0.23889773523345467f,
239
45.1M
          0.0,
240
45.1M
      },
241
45.1M
      {
242
45.1M
          0.0,
243
45.1M
          0.0,
244
45.1M
          0.3017929516615495f,
245
45.1M
          0.25792362796341184f,
246
45.1M
          0.0,
247
45.1M
          0.16272340142866204f,
248
45.1M
          0.09520022653475037f,
249
45.1M
          0.0,
250
45.1M
          0.3017929516615503f,
251
45.1M
          0.09520022653475055f,
252
45.1M
          -0.16272340142866173f,
253
45.1M
          -0.35312385449816297f,
254
45.1M
          0.25792362796341295f,
255
45.1M
          0.0,
256
45.1M
          -0.3531238544981624f,
257
45.1M
          -0.6035859033230976f,
258
45.1M
      },
259
45.1M
      {
260
45.1M
          0.0,
261
45.1M
          0.0,
262
45.1M
          0.40824829046386274f,
263
45.1M
          0.0,
264
45.1M
          0.0,
265
45.1M
          0.0,
266
45.1M
          0.0,
267
45.1M
          -0.4082482904638628f,
268
45.1M
          -0.4082482904638635f,
269
45.1M
          0.0,
270
45.1M
          0.0,
271
45.1M
          -0.40824829046386296f,
272
45.1M
          0.0,
273
45.1M
          0.4082482904638634f,
274
45.1M
          0.408248290463863f,
275
45.1M
          0.0,
276
45.1M
      },
277
45.1M
      {
278
45.1M
          0.0,
279
45.1M
          0.0,
280
45.1M
          0.1747866975480809f,
281
45.1M
          0.0812611176717539f,
282
45.1M
          0.0,
283
45.1M
          0.0,
284
45.1M
          -0.3675398009862027f,
285
45.1M
          -0.307882213957909f,
286
45.1M
          -0.17478669754808135f,
287
45.1M
          0.3675398009862011f,
288
45.1M
          0.0,
289
45.1M
          0.4826689115059883f,
290
45.1M
          -0.08126111767175039f,
291
45.1M
          0.30788221395790305f,
292
45.1M
          -0.48266891150598584f,
293
45.1M
          0.0,
294
45.1M
      },
295
45.1M
      {
296
45.1M
          0.0,
297
45.1M
          0.0,
298
45.1M
          -0.21105601049335784f,
299
45.1M
          0.18567180916109802f,
300
45.1M
          0.0,
301
45.1M
          0.0,
302
45.1M
          0.49215859013738733f,
303
45.1M
          -0.38525013709251915f,
304
45.1M
          0.21105601049335806f,
305
45.1M
          -0.49215859013738905f,
306
45.1M
          0.0,
307
45.1M
          0.17419412659916217f,
308
45.1M
          -0.18567180916109904f,
309
45.1M
          0.3852501370925211f,
310
45.1M
          -0.1741941265991621f,
311
45.1M
          0.0,
312
45.1M
      },
313
45.1M
      {
314
45.1M
          0.0,
315
45.1M
          0.0,
316
45.1M
          -0.14266084808807264f,
317
45.1M
          -0.3416446842253372f,
318
45.1M
          0.0,
319
45.1M
          0.7367497537172237f,
320
45.1M
          0.24627107722075148f,
321
45.1M
          -0.08574019035519306f,
322
45.1M
          -0.14266084808807344f,
323
45.1M
          0.24627107722075137f,
324
45.1M
          0.14883399227113567f,
325
45.1M
          -0.04768680350229251f,
326
45.1M
          -0.3416446842253373f,
327
45.1M
          -0.08574019035519267f,
328
45.1M
          -0.047686803502292804f,
329
45.1M
          -0.14266084808807242f,
330
45.1M
      },
331
45.1M
      {
332
45.1M
          0.0,
333
45.1M
          0.0,
334
45.1M
          -0.13813540350758585f,
335
45.1M
          0.3302282550303788f,
336
45.1M
          0.0,
337
45.1M
          0.08755115000587084f,
338
45.1M
          -0.07946706605909573f,
339
45.1M
          -0.4613374887461511f,
340
45.1M
          -0.13813540350758294f,
341
45.1M
          -0.07946706605910261f,
342
45.1M
          0.49724647109535086f,
343
45.1M
          0.12538059448563663f,
344
45.1M
          0.3302282550303805f,
345
45.1M
          -0.4613374887461554f,
346
45.1M
          0.12538059448564315f,
347
45.1M
          -0.13813540350758452f,
348
45.1M
      },
349
45.1M
      {
350
45.1M
          0.0,
351
45.1M
          0.0,
352
45.1M
          -0.17437602599651067f,
353
45.1M
          0.0702790691196284f,
354
45.1M
          0.0,
355
45.1M
          -0.2921026642334881f,
356
45.1M
          0.3623817333531167f,
357
45.1M
          0.0,
358
45.1M
          -0.1743760259965108f,
359
45.1M
          0.36238173335311646f,
360
45.1M
          0.29210266423348785f,
361
45.1M
          -0.4326608024727445f,
362
45.1M
          0.07027906911962818f,
363
45.1M
          0.0,
364
45.1M
          -0.4326608024727457f,
365
45.1M
          0.34875205199302267f,
366
45.1M
      },
367
45.1M
      {
368
45.1M
          0.0,
369
45.1M
          0.0,
370
45.1M
          0.11354987314994337f,
371
45.1M
          -0.07417504595810355f,
372
45.1M
          0.0,
373
45.1M
          0.19402893032594343f,
374
45.1M
          -0.435190496523228f,
375
45.1M
          0.21918684838857466f,
376
45.1M
          0.11354987314994257f,
377
45.1M
          -0.4351904965232251f,
378
45.1M
          0.5550443808910661f,
379
45.1M
          -0.25468277124066463f,
380
45.1M
          -0.07417504595810233f,
381
45.1M
          0.2191868483885728f,
382
45.1M
          -0.25468277124066413f,
383
45.1M
          0.1135498731499429f,
384
45.1M
      },
385
45.1M
  };
386
387
45.1M
  const HWY_CAPPED(float, 16) d;
388
135M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
90.3M
    auto pixel = Zero(d);
390
1.53G
    for (size_t j = 0; j < 16; j++) {
391
1.44G
      auto cf = Set(d, coeffs[j]);
392
1.44G
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
1.44G
      pixel = MulAdd(cf, basis, pixel);
394
1.44G
    }
395
90.3M
    Store(pixel, d, pixels + i);
396
90.3M
  }
397
45.1M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
44.2M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
44.2M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
44.2M
      {
98
44.2M
          0.25,
99
44.2M
          0.25,
100
44.2M
          0.25,
101
44.2M
          0.25,
102
44.2M
          0.25,
103
44.2M
          0.25,
104
44.2M
          0.25,
105
44.2M
          0.25,
106
44.2M
          0.25,
107
44.2M
          0.25,
108
44.2M
          0.25,
109
44.2M
          0.25,
110
44.2M
          0.25,
111
44.2M
          0.25,
112
44.2M
          0.25,
113
44.2M
          0.25,
114
44.2M
      },
115
44.2M
      {
116
44.2M
          0.876902929799142f,
117
44.2M
          0.2206518106944235f,
118
44.2M
          -0.10140050393753763f,
119
44.2M
          -0.1014005039375375f,
120
44.2M
          0.2206518106944236f,
121
44.2M
          -0.10140050393753777f,
122
44.2M
          -0.10140050393753772f,
123
44.2M
          -0.10140050393753763f,
124
44.2M
          -0.10140050393753758f,
125
44.2M
          -0.10140050393753769f,
126
44.2M
          -0.1014005039375375f,
127
44.2M
          -0.10140050393753768f,
128
44.2M
          -0.10140050393753768f,
129
44.2M
          -0.10140050393753759f,
130
44.2M
          -0.10140050393753763f,
131
44.2M
          -0.10140050393753741f,
132
44.2M
      },
133
44.2M
      {
134
44.2M
          0.0,
135
44.2M
          0.0,
136
44.2M
          0.40670075830260755f,
137
44.2M
          0.44444816619734445f,
138
44.2M
          0.0,
139
44.2M
          0.0,
140
44.2M
          0.19574399372042936f,
141
44.2M
          0.2929100136981264f,
142
44.2M
          -0.40670075830260716f,
143
44.2M
          -0.19574399372042872f,
144
44.2M
          0.0,
145
44.2M
          0.11379074460448091f,
146
44.2M
          -0.44444816619734384f,
147
44.2M
          -0.29291001369812636f,
148
44.2M
          -0.1137907446044814f,
149
44.2M
          0.0,
150
44.2M
      },
151
44.2M
      {
152
44.2M
          0.0,
153
44.2M
          0.0,
154
44.2M
          -0.21255748058288748f,
155
44.2M
          0.3085497062849767f,
156
44.2M
          0.0,
157
44.2M
          0.4706702258572536f,
158
44.2M
          -0.1621205195722993f,
159
44.2M
          0.0,
160
44.2M
          -0.21255748058287047f,
161
44.2M
          -0.16212051957228327f,
162
44.2M
          -0.47067022585725277f,
163
44.2M
          -0.1464291867126764f,
164
44.2M
          0.3085497062849487f,
165
44.2M
          0.0,
166
44.2M
          -0.14642918671266536f,
167
44.2M
          0.4251149611657548f,
168
44.2M
      },
169
44.2M
      {
170
44.2M
          0.0,
171
44.2M
          -0.7071067811865474f,
172
44.2M
          0.0,
173
44.2M
          0.0,
174
44.2M
          0.7071067811865476f,
175
44.2M
          0.0,
176
44.2M
          0.0,
177
44.2M
          0.0,
178
44.2M
          0.0,
179
44.2M
          0.0,
180
44.2M
          0.0,
181
44.2M
          0.0,
182
44.2M
          0.0,
183
44.2M
          0.0,
184
44.2M
          0.0,
185
44.2M
          0.0,
186
44.2M
      },
187
44.2M
      {
188
44.2M
          -0.4105377591765233f,
189
44.2M
          0.6235485373547691f,
190
44.2M
          -0.06435071657946274f,
191
44.2M
          -0.06435071657946266f,
192
44.2M
          0.6235485373547694f,
193
44.2M
          -0.06435071657946284f,
194
44.2M
          -0.0643507165794628f,
195
44.2M
          -0.06435071657946274f,
196
44.2M
          -0.06435071657946272f,
197
44.2M
          -0.06435071657946279f,
198
44.2M
          -0.06435071657946266f,
199
44.2M
          -0.06435071657946277f,
200
44.2M
          -0.06435071657946277f,
201
44.2M
          -0.06435071657946273f,
202
44.2M
          -0.06435071657946274f,
203
44.2M
          -0.0643507165794626f,
204
44.2M
      },
205
44.2M
      {
206
44.2M
          0.0,
207
44.2M
          0.0,
208
44.2M
          -0.4517556589999482f,
209
44.2M
          0.15854503551840063f,
210
44.2M
          0.0,
211
44.2M
          -0.04038515160822202f,
212
44.2M
          0.0074182263792423875f,
213
44.2M
          0.39351034269210167f,
214
44.2M
          -0.45175565899994635f,
215
44.2M
          0.007418226379244351f,
216
44.2M
          0.1107416575309343f,
217
44.2M
          0.08298163094882051f,
218
44.2M
          0.15854503551839705f,
219
44.2M
          0.3935103426921022f,
220
44.2M
          0.0829816309488214f,
221
44.2M
          -0.45175565899994796f,
222
44.2M
      },
223
44.2M
      {
224
44.2M
          0.0,
225
44.2M
          0.0,
226
44.2M
          -0.304684750724869f,
227
44.2M
          0.5112616136591823f,
228
44.2M
          0.0,
229
44.2M
          0.0,
230
44.2M
          -0.290480129728998f,
231
44.2M
          -0.06578701549142804f,
232
44.2M
          0.304684750724884f,
233
44.2M
          0.2904801297290076f,
234
44.2M
          0.0,
235
44.2M
          -0.23889773523344604f,
236
44.2M
          -0.5112616136592012f,
237
44.2M
          0.06578701549142545f,
238
44.2M
          0.23889773523345467f,
239
44.2M
          0.0,
240
44.2M
      },
241
44.2M
      {
242
44.2M
          0.0,
243
44.2M
          0.0,
244
44.2M
          0.3017929516615495f,
245
44.2M
          0.25792362796341184f,
246
44.2M
          0.0,
247
44.2M
          0.16272340142866204f,
248
44.2M
          0.09520022653475037f,
249
44.2M
          0.0,
250
44.2M
          0.3017929516615503f,
251
44.2M
          0.09520022653475055f,
252
44.2M
          -0.16272340142866173f,
253
44.2M
          -0.35312385449816297f,
254
44.2M
          0.25792362796341295f,
255
44.2M
          0.0,
256
44.2M
          -0.3531238544981624f,
257
44.2M
          -0.6035859033230976f,
258
44.2M
      },
259
44.2M
      {
260
44.2M
          0.0,
261
44.2M
          0.0,
262
44.2M
          0.40824829046386274f,
263
44.2M
          0.0,
264
44.2M
          0.0,
265
44.2M
          0.0,
266
44.2M
          0.0,
267
44.2M
          -0.4082482904638628f,
268
44.2M
          -0.4082482904638635f,
269
44.2M
          0.0,
270
44.2M
          0.0,
271
44.2M
          -0.40824829046386296f,
272
44.2M
          0.0,
273
44.2M
          0.4082482904638634f,
274
44.2M
          0.408248290463863f,
275
44.2M
          0.0,
276
44.2M
      },
277
44.2M
      {
278
44.2M
          0.0,
279
44.2M
          0.0,
280
44.2M
          0.1747866975480809f,
281
44.2M
          0.0812611176717539f,
282
44.2M
          0.0,
283
44.2M
          0.0,
284
44.2M
          -0.3675398009862027f,
285
44.2M
          -0.307882213957909f,
286
44.2M
          -0.17478669754808135f,
287
44.2M
          0.3675398009862011f,
288
44.2M
          0.0,
289
44.2M
          0.4826689115059883f,
290
44.2M
          -0.08126111767175039f,
291
44.2M
          0.30788221395790305f,
292
44.2M
          -0.48266891150598584f,
293
44.2M
          0.0,
294
44.2M
      },
295
44.2M
      {
296
44.2M
          0.0,
297
44.2M
          0.0,
298
44.2M
          -0.21105601049335784f,
299
44.2M
          0.18567180916109802f,
300
44.2M
          0.0,
301
44.2M
          0.0,
302
44.2M
          0.49215859013738733f,
303
44.2M
          -0.38525013709251915f,
304
44.2M
          0.21105601049335806f,
305
44.2M
          -0.49215859013738905f,
306
44.2M
          0.0,
307
44.2M
          0.17419412659916217f,
308
44.2M
          -0.18567180916109904f,
309
44.2M
          0.3852501370925211f,
310
44.2M
          -0.1741941265991621f,
311
44.2M
          0.0,
312
44.2M
      },
313
44.2M
      {
314
44.2M
          0.0,
315
44.2M
          0.0,
316
44.2M
          -0.14266084808807264f,
317
44.2M
          -0.3416446842253372f,
318
44.2M
          0.0,
319
44.2M
          0.7367497537172237f,
320
44.2M
          0.24627107722075148f,
321
44.2M
          -0.08574019035519306f,
322
44.2M
          -0.14266084808807344f,
323
44.2M
          0.24627107722075137f,
324
44.2M
          0.14883399227113567f,
325
44.2M
          -0.04768680350229251f,
326
44.2M
          -0.3416446842253373f,
327
44.2M
          -0.08574019035519267f,
328
44.2M
          -0.047686803502292804f,
329
44.2M
          -0.14266084808807242f,
330
44.2M
      },
331
44.2M
      {
332
44.2M
          0.0,
333
44.2M
          0.0,
334
44.2M
          -0.13813540350758585f,
335
44.2M
          0.3302282550303788f,
336
44.2M
          0.0,
337
44.2M
          0.08755115000587084f,
338
44.2M
          -0.07946706605909573f,
339
44.2M
          -0.4613374887461511f,
340
44.2M
          -0.13813540350758294f,
341
44.2M
          -0.07946706605910261f,
342
44.2M
          0.49724647109535086f,
343
44.2M
          0.12538059448563663f,
344
44.2M
          0.3302282550303805f,
345
44.2M
          -0.4613374887461554f,
346
44.2M
          0.12538059448564315f,
347
44.2M
          -0.13813540350758452f,
348
44.2M
      },
349
44.2M
      {
350
44.2M
          0.0,
351
44.2M
          0.0,
352
44.2M
          -0.17437602599651067f,
353
44.2M
          0.0702790691196284f,
354
44.2M
          0.0,
355
44.2M
          -0.2921026642334881f,
356
44.2M
          0.3623817333531167f,
357
44.2M
          0.0,
358
44.2M
          -0.1743760259965108f,
359
44.2M
          0.36238173335311646f,
360
44.2M
          0.29210266423348785f,
361
44.2M
          -0.4326608024727445f,
362
44.2M
          0.07027906911962818f,
363
44.2M
          0.0,
364
44.2M
          -0.4326608024727457f,
365
44.2M
          0.34875205199302267f,
366
44.2M
      },
367
44.2M
      {
368
44.2M
          0.0,
369
44.2M
          0.0,
370
44.2M
          0.11354987314994337f,
371
44.2M
          -0.07417504595810355f,
372
44.2M
          0.0,
373
44.2M
          0.19402893032594343f,
374
44.2M
          -0.435190496523228f,
375
44.2M
          0.21918684838857466f,
376
44.2M
          0.11354987314994257f,
377
44.2M
          -0.4351904965232251f,
378
44.2M
          0.5550443808910661f,
379
44.2M
          -0.25468277124066463f,
380
44.2M
          -0.07417504595810233f,
381
44.2M
          0.2191868483885728f,
382
44.2M
          -0.25468277124066413f,
383
44.2M
          0.1135498731499429f,
384
44.2M
      },
385
44.2M
  };
386
387
44.2M
  const HWY_CAPPED(float, 16) d;
388
132M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
88.5M
    auto pixel = Zero(d);
390
1.50G
    for (size_t j = 0; j < 16; j++) {
391
1.41G
      auto cf = Set(d, coeffs[j]);
392
1.41G
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
1.41G
      pixel = MulAdd(cf, basis, pixel);
394
1.41G
    }
395
88.5M
    Store(pixel, d, pixels + i);
396
88.5M
  }
397
44.2M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
899k
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
899k
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
899k
      {
98
899k
          0.25,
99
899k
          0.25,
100
899k
          0.25,
101
899k
          0.25,
102
899k
          0.25,
103
899k
          0.25,
104
899k
          0.25,
105
899k
          0.25,
106
899k
          0.25,
107
899k
          0.25,
108
899k
          0.25,
109
899k
          0.25,
110
899k
          0.25,
111
899k
          0.25,
112
899k
          0.25,
113
899k
          0.25,
114
899k
      },
115
899k
      {
116
899k
          0.876902929799142f,
117
899k
          0.2206518106944235f,
118
899k
          -0.10140050393753763f,
119
899k
          -0.1014005039375375f,
120
899k
          0.2206518106944236f,
121
899k
          -0.10140050393753777f,
122
899k
          -0.10140050393753772f,
123
899k
          -0.10140050393753763f,
124
899k
          -0.10140050393753758f,
125
899k
          -0.10140050393753769f,
126
899k
          -0.1014005039375375f,
127
899k
          -0.10140050393753768f,
128
899k
          -0.10140050393753768f,
129
899k
          -0.10140050393753759f,
130
899k
          -0.10140050393753763f,
131
899k
          -0.10140050393753741f,
132
899k
      },
133
899k
      {
134
899k
          0.0,
135
899k
          0.0,
136
899k
          0.40670075830260755f,
137
899k
          0.44444816619734445f,
138
899k
          0.0,
139
899k
          0.0,
140
899k
          0.19574399372042936f,
141
899k
          0.2929100136981264f,
142
899k
          -0.40670075830260716f,
143
899k
          -0.19574399372042872f,
144
899k
          0.0,
145
899k
          0.11379074460448091f,
146
899k
          -0.44444816619734384f,
147
899k
          -0.29291001369812636f,
148
899k
          -0.1137907446044814f,
149
899k
          0.0,
150
899k
      },
151
899k
      {
152
899k
          0.0,
153
899k
          0.0,
154
899k
          -0.21255748058288748f,
155
899k
          0.3085497062849767f,
156
899k
          0.0,
157
899k
          0.4706702258572536f,
158
899k
          -0.1621205195722993f,
159
899k
          0.0,
160
899k
          -0.21255748058287047f,
161
899k
          -0.16212051957228327f,
162
899k
          -0.47067022585725277f,
163
899k
          -0.1464291867126764f,
164
899k
          0.3085497062849487f,
165
899k
          0.0,
166
899k
          -0.14642918671266536f,
167
899k
          0.4251149611657548f,
168
899k
      },
169
899k
      {
170
899k
          0.0,
171
899k
          -0.7071067811865474f,
172
899k
          0.0,
173
899k
          0.0,
174
899k
          0.7071067811865476f,
175
899k
          0.0,
176
899k
          0.0,
177
899k
          0.0,
178
899k
          0.0,
179
899k
          0.0,
180
899k
          0.0,
181
899k
          0.0,
182
899k
          0.0,
183
899k
          0.0,
184
899k
          0.0,
185
899k
          0.0,
186
899k
      },
187
899k
      {
188
899k
          -0.4105377591765233f,
189
899k
          0.6235485373547691f,
190
899k
          -0.06435071657946274f,
191
899k
          -0.06435071657946266f,
192
899k
          0.6235485373547694f,
193
899k
          -0.06435071657946284f,
194
899k
          -0.0643507165794628f,
195
899k
          -0.06435071657946274f,
196
899k
          -0.06435071657946272f,
197
899k
          -0.06435071657946279f,
198
899k
          -0.06435071657946266f,
199
899k
          -0.06435071657946277f,
200
899k
          -0.06435071657946277f,
201
899k
          -0.06435071657946273f,
202
899k
          -0.06435071657946274f,
203
899k
          -0.0643507165794626f,
204
899k
      },
205
899k
      {
206
899k
          0.0,
207
899k
          0.0,
208
899k
          -0.4517556589999482f,
209
899k
          0.15854503551840063f,
210
899k
          0.0,
211
899k
          -0.04038515160822202f,
212
899k
          0.0074182263792423875f,
213
899k
          0.39351034269210167f,
214
899k
          -0.45175565899994635f,
215
899k
          0.007418226379244351f,
216
899k
          0.1107416575309343f,
217
899k
          0.08298163094882051f,
218
899k
          0.15854503551839705f,
219
899k
          0.3935103426921022f,
220
899k
          0.0829816309488214f,
221
899k
          -0.45175565899994796f,
222
899k
      },
223
899k
      {
224
899k
          0.0,
225
899k
          0.0,
226
899k
          -0.304684750724869f,
227
899k
          0.5112616136591823f,
228
899k
          0.0,
229
899k
          0.0,
230
899k
          -0.290480129728998f,
231
899k
          -0.06578701549142804f,
232
899k
          0.304684750724884f,
233
899k
          0.2904801297290076f,
234
899k
          0.0,
235
899k
          -0.23889773523344604f,
236
899k
          -0.5112616136592012f,
237
899k
          0.06578701549142545f,
238
899k
          0.23889773523345467f,
239
899k
          0.0,
240
899k
      },
241
899k
      {
242
899k
          0.0,
243
899k
          0.0,
244
899k
          0.3017929516615495f,
245
899k
          0.25792362796341184f,
246
899k
          0.0,
247
899k
          0.16272340142866204f,
248
899k
          0.09520022653475037f,
249
899k
          0.0,
250
899k
          0.3017929516615503f,
251
899k
          0.09520022653475055f,
252
899k
          -0.16272340142866173f,
253
899k
          -0.35312385449816297f,
254
899k
          0.25792362796341295f,
255
899k
          0.0,
256
899k
          -0.3531238544981624f,
257
899k
          -0.6035859033230976f,
258
899k
      },
259
899k
      {
260
899k
          0.0,
261
899k
          0.0,
262
899k
          0.40824829046386274f,
263
899k
          0.0,
264
899k
          0.0,
265
899k
          0.0,
266
899k
          0.0,
267
899k
          -0.4082482904638628f,
268
899k
          -0.4082482904638635f,
269
899k
          0.0,
270
899k
          0.0,
271
899k
          -0.40824829046386296f,
272
899k
          0.0,
273
899k
          0.4082482904638634f,
274
899k
          0.408248290463863f,
275
899k
          0.0,
276
899k
      },
277
899k
      {
278
899k
          0.0,
279
899k
          0.0,
280
899k
          0.1747866975480809f,
281
899k
          0.0812611176717539f,
282
899k
          0.0,
283
899k
          0.0,
284
899k
          -0.3675398009862027f,
285
899k
          -0.307882213957909f,
286
899k
          -0.17478669754808135f,
287
899k
          0.3675398009862011f,
288
899k
          0.0,
289
899k
          0.4826689115059883f,
290
899k
          -0.08126111767175039f,
291
899k
          0.30788221395790305f,
292
899k
          -0.48266891150598584f,
293
899k
          0.0,
294
899k
      },
295
899k
      {
296
899k
          0.0,
297
899k
          0.0,
298
899k
          -0.21105601049335784f,
299
899k
          0.18567180916109802f,
300
899k
          0.0,
301
899k
          0.0,
302
899k
          0.49215859013738733f,
303
899k
          -0.38525013709251915f,
304
899k
          0.21105601049335806f,
305
899k
          -0.49215859013738905f,
306
899k
          0.0,
307
899k
          0.17419412659916217f,
308
899k
          -0.18567180916109904f,
309
899k
          0.3852501370925211f,
310
899k
          -0.1741941265991621f,
311
899k
          0.0,
312
899k
      },
313
899k
      {
314
899k
          0.0,
315
899k
          0.0,
316
899k
          -0.14266084808807264f,
317
899k
          -0.3416446842253372f,
318
899k
          0.0,
319
899k
          0.7367497537172237f,
320
899k
          0.24627107722075148f,
321
899k
          -0.08574019035519306f,
322
899k
          -0.14266084808807344f,
323
899k
          0.24627107722075137f,
324
899k
          0.14883399227113567f,
325
899k
          -0.04768680350229251f,
326
899k
          -0.3416446842253373f,
327
899k
          -0.08574019035519267f,
328
899k
          -0.047686803502292804f,
329
899k
          -0.14266084808807242f,
330
899k
      },
331
899k
      {
332
899k
          0.0,
333
899k
          0.0,
334
899k
          -0.13813540350758585f,
335
899k
          0.3302282550303788f,
336
899k
          0.0,
337
899k
          0.08755115000587084f,
338
899k
          -0.07946706605909573f,
339
899k
          -0.4613374887461511f,
340
899k
          -0.13813540350758294f,
341
899k
          -0.07946706605910261f,
342
899k
          0.49724647109535086f,
343
899k
          0.12538059448563663f,
344
899k
          0.3302282550303805f,
345
899k
          -0.4613374887461554f,
346
899k
          0.12538059448564315f,
347
899k
          -0.13813540350758452f,
348
899k
      },
349
899k
      {
350
899k
          0.0,
351
899k
          0.0,
352
899k
          -0.17437602599651067f,
353
899k
          0.0702790691196284f,
354
899k
          0.0,
355
899k
          -0.2921026642334881f,
356
899k
          0.3623817333531167f,
357
899k
          0.0,
358
899k
          -0.1743760259965108f,
359
899k
          0.36238173335311646f,
360
899k
          0.29210266423348785f,
361
899k
          -0.4326608024727445f,
362
899k
          0.07027906911962818f,
363
899k
          0.0,
364
899k
          -0.4326608024727457f,
365
899k
          0.34875205199302267f,
366
899k
      },
367
899k
      {
368
899k
          0.0,
369
899k
          0.0,
370
899k
          0.11354987314994337f,
371
899k
          -0.07417504595810355f,
372
899k
          0.0,
373
899k
          0.19402893032594343f,
374
899k
          -0.435190496523228f,
375
899k
          0.21918684838857466f,
376
899k
          0.11354987314994257f,
377
899k
          -0.4351904965232251f,
378
899k
          0.5550443808910661f,
379
899k
          -0.25468277124066463f,
380
899k
          -0.07417504595810233f,
381
899k
          0.2191868483885728f,
382
899k
          -0.25468277124066413f,
383
899k
          0.1135498731499429f,
384
899k
      },
385
899k
  };
386
387
899k
  const HWY_CAPPED(float, 16) d;
388
2.69M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
1.79M
    auto pixel = Zero(d);
390
30.5M
    for (size_t j = 0; j < 16; j++) {
391
28.7M
      auto cf = Set(d, coeffs[j]);
392
28.7M
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
28.7M
      pixel = MulAdd(cf, basis, pixel);
394
28.7M
    }
395
1.79M
    Store(pixel, d, pixels + i);
396
1.79M
  }
397
899k
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
398
399
template <size_t afv_kind>
400
void AFVTransformToPixels(const float* JXL_RESTRICT coefficients,
401
45.1M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
45.1M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
45.1M
  size_t afv_x = afv_kind & 1;
404
45.1M
  size_t afv_y = afv_kind / 2;
405
45.1M
  float dcs[3] = {};
406
45.1M
  float block00 = coefficients[0];
407
45.1M
  float block01 = coefficients[1];
408
45.1M
  float block10 = coefficients[8];
409
45.1M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
45.1M
  dcs[1] = (block00 + block10 - block01);
411
45.1M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
45.1M
  HWY_ALIGN float coeff[4 * 4];
414
45.1M
  coeff[0] = dcs[0];
415
225M
  for (size_t iy = 0; iy < 4; iy++) {
416
903M
    for (size_t ix = 0; ix < 4; ix++) {
417
722M
      if (ix == 0 && iy == 0) continue;
418
677M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
677M
    }
420
180M
  }
421
45.1M
  HWY_ALIGN float block[4 * 8];
422
45.1M
  AFVIDCT4x4(coeff, block);
423
225M
  for (size_t iy = 0; iy < 4; iy++) {
424
903M
    for (size_t ix = 0; ix < 4; ix++) {
425
722M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
722M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
722M
    }
428
180M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
45.1M
  block[0] = dcs[1];
431
225M
  for (size_t iy = 0; iy < 4; iy++) {
432
903M
    for (size_t ix = 0; ix < 4; ix++) {
433
722M
      if (ix == 0 && iy == 0) continue;
434
677M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
677M
    }
436
180M
  }
437
45.1M
  ComputeScaledIDCT<4, 4>()(
438
45.1M
      block,
439
45.1M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
45.1M
            pixels_stride),
441
45.1M
      scratch_space);
442
  // IDCT4x8.
443
45.1M
  block[0] = dcs[2];
444
225M
  for (size_t iy = 0; iy < 4; iy++) {
445
1.62G
    for (size_t ix = 0; ix < 8; ix++) {
446
1.44G
      if (ix == 0 && iy == 0) continue;
447
1.40G
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
1.40G
    }
449
180M
  }
450
45.1M
  ComputeScaledIDCT<4, 8>()(
451
45.1M
      block,
452
45.1M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
45.1M
      scratch_space);
454
45.1M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
11.0M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
11.0M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
11.0M
  size_t afv_x = afv_kind & 1;
404
11.0M
  size_t afv_y = afv_kind / 2;
405
11.0M
  float dcs[3] = {};
406
11.0M
  float block00 = coefficients[0];
407
11.0M
  float block01 = coefficients[1];
408
11.0M
  float block10 = coefficients[8];
409
11.0M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
11.0M
  dcs[1] = (block00 + block10 - block01);
411
11.0M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
11.0M
  HWY_ALIGN float coeff[4 * 4];
414
11.0M
  coeff[0] = dcs[0];
415
55.3M
  for (size_t iy = 0; iy < 4; iy++) {
416
221M
    for (size_t ix = 0; ix < 4; ix++) {
417
177M
      if (ix == 0 && iy == 0) continue;
418
166M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
166M
    }
420
44.2M
  }
421
11.0M
  HWY_ALIGN float block[4 * 8];
422
11.0M
  AFVIDCT4x4(coeff, block);
423
55.3M
  for (size_t iy = 0; iy < 4; iy++) {
424
221M
    for (size_t ix = 0; ix < 4; ix++) {
425
177M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
177M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
177M
    }
428
44.2M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
11.0M
  block[0] = dcs[1];
431
55.3M
  for (size_t iy = 0; iy < 4; iy++) {
432
221M
    for (size_t ix = 0; ix < 4; ix++) {
433
177M
      if (ix == 0 && iy == 0) continue;
434
166M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
166M
    }
436
44.2M
  }
437
11.0M
  ComputeScaledIDCT<4, 4>()(
438
11.0M
      block,
439
11.0M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
11.0M
            pixels_stride),
441
11.0M
      scratch_space);
442
  // IDCT4x8.
443
11.0M
  block[0] = dcs[2];
444
55.3M
  for (size_t iy = 0; iy < 4; iy++) {
445
398M
    for (size_t ix = 0; ix < 8; ix++) {
446
354M
      if (ix == 0 && iy == 0) continue;
447
343M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
343M
    }
449
44.2M
  }
450
11.0M
  ComputeScaledIDCT<4, 8>()(
451
11.0M
      block,
452
11.0M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
11.0M
      scratch_space);
454
11.0M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
11.0M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
11.0M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
11.0M
  size_t afv_x = afv_kind & 1;
404
11.0M
  size_t afv_y = afv_kind / 2;
405
11.0M
  float dcs[3] = {};
406
11.0M
  float block00 = coefficients[0];
407
11.0M
  float block01 = coefficients[1];
408
11.0M
  float block10 = coefficients[8];
409
11.0M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
11.0M
  dcs[1] = (block00 + block10 - block01);
411
11.0M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
11.0M
  HWY_ALIGN float coeff[4 * 4];
414
11.0M
  coeff[0] = dcs[0];
415
55.3M
  for (size_t iy = 0; iy < 4; iy++) {
416
221M
    for (size_t ix = 0; ix < 4; ix++) {
417
177M
      if (ix == 0 && iy == 0) continue;
418
166M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
166M
    }
420
44.2M
  }
421
11.0M
  HWY_ALIGN float block[4 * 8];
422
11.0M
  AFVIDCT4x4(coeff, block);
423
55.3M
  for (size_t iy = 0; iy < 4; iy++) {
424
221M
    for (size_t ix = 0; ix < 4; ix++) {
425
177M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
177M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
177M
    }
428
44.2M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
11.0M
  block[0] = dcs[1];
431
55.3M
  for (size_t iy = 0; iy < 4; iy++) {
432
221M
    for (size_t ix = 0; ix < 4; ix++) {
433
177M
      if (ix == 0 && iy == 0) continue;
434
166M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
166M
    }
436
44.2M
  }
437
11.0M
  ComputeScaledIDCT<4, 4>()(
438
11.0M
      block,
439
11.0M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
11.0M
            pixels_stride),
441
11.0M
      scratch_space);
442
  // IDCT4x8.
443
11.0M
  block[0] = dcs[2];
444
55.3M
  for (size_t iy = 0; iy < 4; iy++) {
445
398M
    for (size_t ix = 0; ix < 8; ix++) {
446
354M
      if (ix == 0 && iy == 0) continue;
447
343M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
343M
    }
449
44.2M
  }
450
11.0M
  ComputeScaledIDCT<4, 8>()(
451
11.0M
      block,
452
11.0M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
11.0M
      scratch_space);
454
11.0M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
11.0M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
11.0M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
11.0M
  size_t afv_x = afv_kind & 1;
404
11.0M
  size_t afv_y = afv_kind / 2;
405
11.0M
  float dcs[3] = {};
406
11.0M
  float block00 = coefficients[0];
407
11.0M
  float block01 = coefficients[1];
408
11.0M
  float block10 = coefficients[8];
409
11.0M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
11.0M
  dcs[1] = (block00 + block10 - block01);
411
11.0M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
11.0M
  HWY_ALIGN float coeff[4 * 4];
414
11.0M
  coeff[0] = dcs[0];
415
55.3M
  for (size_t iy = 0; iy < 4; iy++) {
416
221M
    for (size_t ix = 0; ix < 4; ix++) {
417
177M
      if (ix == 0 && iy == 0) continue;
418
166M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
166M
    }
420
44.2M
  }
421
11.0M
  HWY_ALIGN float block[4 * 8];
422
11.0M
  AFVIDCT4x4(coeff, block);
423
55.3M
  for (size_t iy = 0; iy < 4; iy++) {
424
221M
    for (size_t ix = 0; ix < 4; ix++) {
425
177M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
177M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
177M
    }
428
44.2M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
11.0M
  block[0] = dcs[1];
431
55.3M
  for (size_t iy = 0; iy < 4; iy++) {
432
221M
    for (size_t ix = 0; ix < 4; ix++) {
433
177M
      if (ix == 0 && iy == 0) continue;
434
166M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
166M
    }
436
44.2M
  }
437
11.0M
  ComputeScaledIDCT<4, 4>()(
438
11.0M
      block,
439
11.0M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
11.0M
            pixels_stride),
441
11.0M
      scratch_space);
442
  // IDCT4x8.
443
11.0M
  block[0] = dcs[2];
444
55.3M
  for (size_t iy = 0; iy < 4; iy++) {
445
398M
    for (size_t ix = 0; ix < 8; ix++) {
446
354M
      if (ix == 0 && iy == 0) continue;
447
343M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
343M
    }
449
44.2M
  }
450
11.0M
  ComputeScaledIDCT<4, 8>()(
451
11.0M
      block,
452
11.0M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
11.0M
      scratch_space);
454
11.0M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
11.0M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
11.0M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
11.0M
  size_t afv_x = afv_kind & 1;
404
11.0M
  size_t afv_y = afv_kind / 2;
405
11.0M
  float dcs[3] = {};
406
11.0M
  float block00 = coefficients[0];
407
11.0M
  float block01 = coefficients[1];
408
11.0M
  float block10 = coefficients[8];
409
11.0M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
11.0M
  dcs[1] = (block00 + block10 - block01);
411
11.0M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
11.0M
  HWY_ALIGN float coeff[4 * 4];
414
11.0M
  coeff[0] = dcs[0];
415
55.3M
  for (size_t iy = 0; iy < 4; iy++) {
416
221M
    for (size_t ix = 0; ix < 4; ix++) {
417
177M
      if (ix == 0 && iy == 0) continue;
418
166M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
166M
    }
420
44.2M
  }
421
11.0M
  HWY_ALIGN float block[4 * 8];
422
11.0M
  AFVIDCT4x4(coeff, block);
423
55.3M
  for (size_t iy = 0; iy < 4; iy++) {
424
221M
    for (size_t ix = 0; ix < 4; ix++) {
425
177M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
177M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
177M
    }
428
44.2M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
11.0M
  block[0] = dcs[1];
431
55.3M
  for (size_t iy = 0; iy < 4; iy++) {
432
221M
    for (size_t ix = 0; ix < 4; ix++) {
433
177M
      if (ix == 0 && iy == 0) continue;
434
166M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
166M
    }
436
44.2M
  }
437
11.0M
  ComputeScaledIDCT<4, 4>()(
438
11.0M
      block,
439
11.0M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
11.0M
            pixels_stride),
441
11.0M
      scratch_space);
442
  // IDCT4x8.
443
11.0M
  block[0] = dcs[2];
444
55.3M
  for (size_t iy = 0; iy < 4; iy++) {
445
398M
    for (size_t ix = 0; ix < 8; ix++) {
446
354M
      if (ix == 0 && iy == 0) continue;
447
343M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
343M
    }
449
44.2M
  }
450
11.0M
  ComputeScaledIDCT<4, 8>()(
451
11.0M
      block,
452
11.0M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
11.0M
      scratch_space);
454
11.0M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
302k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
302k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
302k
  size_t afv_x = afv_kind & 1;
404
302k
  size_t afv_y = afv_kind / 2;
405
302k
  float dcs[3] = {};
406
302k
  float block00 = coefficients[0];
407
302k
  float block01 = coefficients[1];
408
302k
  float block10 = coefficients[8];
409
302k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
302k
  dcs[1] = (block00 + block10 - block01);
411
302k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
302k
  HWY_ALIGN float coeff[4 * 4];
414
302k
  coeff[0] = dcs[0];
415
1.51M
  for (size_t iy = 0; iy < 4; iy++) {
416
6.04M
    for (size_t ix = 0; ix < 4; ix++) {
417
4.83M
      if (ix == 0 && iy == 0) continue;
418
4.53M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
4.53M
    }
420
1.20M
  }
421
302k
  HWY_ALIGN float block[4 * 8];
422
302k
  AFVIDCT4x4(coeff, block);
423
1.51M
  for (size_t iy = 0; iy < 4; iy++) {
424
6.04M
    for (size_t ix = 0; ix < 4; ix++) {
425
4.83M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
4.83M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
4.83M
    }
428
1.20M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
302k
  block[0] = dcs[1];
431
1.51M
  for (size_t iy = 0; iy < 4; iy++) {
432
6.04M
    for (size_t ix = 0; ix < 4; ix++) {
433
4.83M
      if (ix == 0 && iy == 0) continue;
434
4.53M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
4.53M
    }
436
1.20M
  }
437
302k
  ComputeScaledIDCT<4, 4>()(
438
302k
      block,
439
302k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
302k
            pixels_stride),
441
302k
      scratch_space);
442
  // IDCT4x8.
443
302k
  block[0] = dcs[2];
444
1.51M
  for (size_t iy = 0; iy < 4; iy++) {
445
10.8M
    for (size_t ix = 0; ix < 8; ix++) {
446
9.66M
      if (ix == 0 && iy == 0) continue;
447
9.36M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
9.36M
    }
449
1.20M
  }
450
302k
  ComputeScaledIDCT<4, 8>()(
451
302k
      block,
452
302k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
302k
      scratch_space);
454
302k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
168k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
168k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
168k
  size_t afv_x = afv_kind & 1;
404
168k
  size_t afv_y = afv_kind / 2;
405
168k
  float dcs[3] = {};
406
168k
  float block00 = coefficients[0];
407
168k
  float block01 = coefficients[1];
408
168k
  float block10 = coefficients[8];
409
168k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
168k
  dcs[1] = (block00 + block10 - block01);
411
168k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
168k
  HWY_ALIGN float coeff[4 * 4];
414
168k
  coeff[0] = dcs[0];
415
840k
  for (size_t iy = 0; iy < 4; iy++) {
416
3.36M
    for (size_t ix = 0; ix < 4; ix++) {
417
2.68M
      if (ix == 0 && iy == 0) continue;
418
2.52M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
2.52M
    }
420
672k
  }
421
168k
  HWY_ALIGN float block[4 * 8];
422
168k
  AFVIDCT4x4(coeff, block);
423
840k
  for (size_t iy = 0; iy < 4; iy++) {
424
3.36M
    for (size_t ix = 0; ix < 4; ix++) {
425
2.68M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
2.68M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
2.68M
    }
428
672k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
168k
  block[0] = dcs[1];
431
840k
  for (size_t iy = 0; iy < 4; iy++) {
432
3.36M
    for (size_t ix = 0; ix < 4; ix++) {
433
2.68M
      if (ix == 0 && iy == 0) continue;
434
2.52M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
2.52M
    }
436
672k
  }
437
168k
  ComputeScaledIDCT<4, 4>()(
438
168k
      block,
439
168k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
168k
            pixels_stride),
441
168k
      scratch_space);
442
  // IDCT4x8.
443
168k
  block[0] = dcs[2];
444
840k
  for (size_t iy = 0; iy < 4; iy++) {
445
6.05M
    for (size_t ix = 0; ix < 8; ix++) {
446
5.37M
      if (ix == 0 && iy == 0) continue;
447
5.20M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
5.20M
    }
449
672k
  }
450
168k
  ComputeScaledIDCT<4, 8>()(
451
168k
      block,
452
168k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
168k
      scratch_space);
454
168k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
206k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
206k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
206k
  size_t afv_x = afv_kind & 1;
404
206k
  size_t afv_y = afv_kind / 2;
405
206k
  float dcs[3] = {};
406
206k
  float block00 = coefficients[0];
407
206k
  float block01 = coefficients[1];
408
206k
  float block10 = coefficients[8];
409
206k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
206k
  dcs[1] = (block00 + block10 - block01);
411
206k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
206k
  HWY_ALIGN float coeff[4 * 4];
414
206k
  coeff[0] = dcs[0];
415
1.03M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.12M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.29M
      if (ix == 0 && iy == 0) continue;
418
3.09M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
3.09M
    }
420
824k
  }
421
206k
  HWY_ALIGN float block[4 * 8];
422
206k
  AFVIDCT4x4(coeff, block);
423
1.03M
  for (size_t iy = 0; iy < 4; iy++) {
424
4.12M
    for (size_t ix = 0; ix < 4; ix++) {
425
3.29M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
3.29M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
3.29M
    }
428
824k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
206k
  block[0] = dcs[1];
431
1.03M
  for (size_t iy = 0; iy < 4; iy++) {
432
4.12M
    for (size_t ix = 0; ix < 4; ix++) {
433
3.29M
      if (ix == 0 && iy == 0) continue;
434
3.09M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
3.09M
    }
436
824k
  }
437
206k
  ComputeScaledIDCT<4, 4>()(
438
206k
      block,
439
206k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
206k
            pixels_stride),
441
206k
      scratch_space);
442
  // IDCT4x8.
443
206k
  block[0] = dcs[2];
444
1.03M
  for (size_t iy = 0; iy < 4; iy++) {
445
7.42M
    for (size_t ix = 0; ix < 8; ix++) {
446
6.59M
      if (ix == 0 && iy == 0) continue;
447
6.39M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
6.39M
    }
449
824k
  }
450
206k
  ComputeScaledIDCT<4, 8>()(
451
206k
      block,
452
206k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
206k
      scratch_space);
454
206k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
222k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
222k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
222k
  size_t afv_x = afv_kind & 1;
404
222k
  size_t afv_y = afv_kind / 2;
405
222k
  float dcs[3] = {};
406
222k
  float block00 = coefficients[0];
407
222k
  float block01 = coefficients[1];
408
222k
  float block10 = coefficients[8];
409
222k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
222k
  dcs[1] = (block00 + block10 - block01);
411
222k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
222k
  HWY_ALIGN float coeff[4 * 4];
414
222k
  coeff[0] = dcs[0];
415
1.11M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.45M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.56M
      if (ix == 0 && iy == 0) continue;
418
3.34M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
3.34M
    }
420
891k
  }
421
222k
  HWY_ALIGN float block[4 * 8];
422
222k
  AFVIDCT4x4(coeff, block);
423
1.11M
  for (size_t iy = 0; iy < 4; iy++) {
424
4.45M
    for (size_t ix = 0; ix < 4; ix++) {
425
3.56M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
3.56M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
3.56M
    }
428
891k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
222k
  block[0] = dcs[1];
431
1.11M
  for (size_t iy = 0; iy < 4; iy++) {
432
4.45M
    for (size_t ix = 0; ix < 4; ix++) {
433
3.56M
      if (ix == 0 && iy == 0) continue;
434
3.34M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
3.34M
    }
436
891k
  }
437
222k
  ComputeScaledIDCT<4, 4>()(
438
222k
      block,
439
222k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
222k
            pixels_stride),
441
222k
      scratch_space);
442
  // IDCT4x8.
443
222k
  block[0] = dcs[2];
444
1.11M
  for (size_t iy = 0; iy < 4; iy++) {
445
8.02M
    for (size_t ix = 0; ix < 8; ix++) {
446
7.13M
      if (ix == 0 && iy == 0) continue;
447
6.91M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
6.91M
    }
449
891k
  }
450
222k
  ComputeScaledIDCT<4, 8>()(
451
222k
      block,
452
222k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
222k
      scratch_space);
454
222k
}
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
455
456
HWY_MAYBE_UNUSED void TransformToPixels(const AcStrategyType strategy,
457
                                        float* JXL_RESTRICT coefficients,
458
                                        float* JXL_RESTRICT pixels,
459
                                        size_t pixels_stride,
460
157M
                                        float* scratch_space) {
461
157M
  using Type = AcStrategyType;
462
157M
  switch (strategy) {
463
14.1M
    case Type::IDENTITY: {
464
14.1M
      float dcs[4] = {};
465
14.1M
      float block00 = coefficients[0];
466
14.1M
      float block01 = coefficients[1];
467
14.1M
      float block10 = coefficients[8];
468
14.1M
      float block11 = coefficients[9];
469
14.1M
      dcs[0] = block00 + block01 + block10 + block11;
470
14.1M
      dcs[1] = block00 + block01 - block10 - block11;
471
14.1M
      dcs[2] = block00 - block01 + block10 - block11;
472
14.1M
      dcs[3] = block00 - block01 - block10 + block11;
473
42.4M
      for (size_t y = 0; y < 2; y++) {
474
84.8M
        for (size_t x = 0; x < 2; x++) {
475
56.5M
          float block_dc = dcs[y * 2 + x];
476
56.5M
          float residual_sum = 0;
477
282M
          for (size_t iy = 0; iy < 4; iy++) {
478
1.13G
            for (size_t ix = 0; ix < 4; ix++) {
479
905M
              if (ix == 0 && iy == 0) continue;
480
848M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
848M
            }
482
226M
          }
483
56.5M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
56.5M
              block_dc - residual_sum * (1.0f / 16);
485
282M
          for (size_t iy = 0; iy < 4; iy++) {
486
1.13G
            for (size_t ix = 0; ix < 4; ix++) {
487
905M
              if (ix == 1 && iy == 1) continue;
488
848M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
848M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
848M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
848M
            }
492
226M
          }
493
56.5M
          pixels[y * 4 * pixels_stride + x * 4] =
494
56.5M
              coefficients[(y + 2) * 8 + x + 2] +
495
56.5M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
56.5M
        }
497
28.2M
      }
498
14.1M
      break;
499
0
    }
500
11.5M
    case Type::DCT8X4: {
501
11.5M
      float dcs[2] = {};
502
11.5M
      float block0 = coefficients[0];
503
11.5M
      float block1 = coefficients[8];
504
11.5M
      dcs[0] = block0 + block1;
505
11.5M
      dcs[1] = block0 - block1;
506
34.5M
      for (size_t x = 0; x < 2; x++) {
507
23.0M
        HWY_ALIGN float block[4 * 8];
508
23.0M
        block[0] = dcs[x];
509
115M
        for (size_t iy = 0; iy < 4; iy++) {
510
828M
          for (size_t ix = 0; ix < 8; ix++) {
511
736M
            if (ix == 0 && iy == 0) continue;
512
713M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
713M
          }
514
92.1M
        }
515
23.0M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
23.0M
                                  scratch_space);
517
23.0M
      }
518
11.5M
      break;
519
0
    }
520
11.2M
    case Type::DCT4X8: {
521
11.2M
      float dcs[2] = {};
522
11.2M
      float block0 = coefficients[0];
523
11.2M
      float block1 = coefficients[8];
524
11.2M
      dcs[0] = block0 + block1;
525
11.2M
      dcs[1] = block0 - block1;
526
33.7M
      for (size_t y = 0; y < 2; y++) {
527
22.5M
        HWY_ALIGN float block[4 * 8];
528
22.5M
        block[0] = dcs[y];
529
112M
        for (size_t iy = 0; iy < 4; iy++) {
530
810M
          for (size_t ix = 0; ix < 8; ix++) {
531
720M
            if (ix == 0 && iy == 0) continue;
532
698M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
698M
          }
534
90.1M
        }
535
22.5M
        ComputeScaledIDCT<4, 8>()(
536
22.5M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
22.5M
            scratch_space);
538
22.5M
      }
539
11.2M
      break;
540
0
    }
541
11.0M
    case Type::DCT4X4: {
542
11.0M
      float dcs[4] = {};
543
11.0M
      float block00 = coefficients[0];
544
11.0M
      float block01 = coefficients[1];
545
11.0M
      float block10 = coefficients[8];
546
11.0M
      float block11 = coefficients[9];
547
11.0M
      dcs[0] = block00 + block01 + block10 + block11;
548
11.0M
      dcs[1] = block00 + block01 - block10 - block11;
549
11.0M
      dcs[2] = block00 - block01 + block10 - block11;
550
11.0M
      dcs[3] = block00 - block01 - block10 + block11;
551
33.2M
      for (size_t y = 0; y < 2; y++) {
552
66.4M
        for (size_t x = 0; x < 2; x++) {
553
44.2M
          HWY_ALIGN float block[4 * 4];
554
44.2M
          block[0] = dcs[y * 2 + x];
555
221M
          for (size_t iy = 0; iy < 4; iy++) {
556
885M
            for (size_t ix = 0; ix < 4; ix++) {
557
708M
              if (ix == 0 && iy == 0) continue;
558
664M
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
664M
            }
560
177M
          }
561
44.2M
          ComputeScaledIDCT<4, 4>()(
562
44.2M
              block,
563
44.2M
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
44.2M
              scratch_space);
565
44.2M
        }
566
22.1M
      }
567
11.0M
      break;
568
0
    }
569
17.0M
    case Type::DCT2X2: {
570
17.0M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
17.0M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
17.0M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
17.0M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
17.0M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
153M
      for (size_t y = 0; y < kBlockDim; y++) {
576
1.22G
        for (size_t x = 0; x < kBlockDim; x++) {
577
1.08G
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
1.08G
        }
579
136M
      }
580
17.0M
      break;
581
0
    }
582
5.20M
    case Type::DCT16X16: {
583
5.20M
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
5.20M
                                  scratch_space);
585
5.20M
      break;
586
0
    }
587
9.93M
    case Type::DCT16X8: {
588
9.93M
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
9.93M
                                 scratch_space);
590
9.93M
      break;
591
0
    }
592
10.0M
    case Type::DCT8X16: {
593
10.0M
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
10.0M
                                 scratch_space);
595
10.0M
      break;
596
0
    }
597
693
    case Type::DCT32X8: {
598
693
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
693
                                 scratch_space);
600
693
      break;
601
0
    }
602
96
    case Type::DCT8X32: {
603
96
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
96
                                 scratch_space);
605
96
      break;
606
0
    }
607
1.94M
    case Type::DCT32X16: {
608
1.94M
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
1.94M
                                  scratch_space);
610
1.94M
      break;
611
0
    }
612
1.92M
    case Type::DCT16X32: {
613
1.92M
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
1.92M
                                  scratch_space);
615
1.92M
      break;
616
0
    }
617
1.16M
    case Type::DCT32X32: {
618
1.16M
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
1.16M
                                  scratch_space);
620
1.16M
      break;
621
0
    }
622
16.0M
    case Type::DCT: {
623
16.0M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
16.0M
                                scratch_space);
625
16.0M
      break;
626
0
    }
627
11.3M
    case Type::AFV0: {
628
11.3M
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
11.3M
      break;
630
0
    }
631
11.2M
    case Type::AFV1: {
632
11.2M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
11.2M
      break;
634
0
    }
635
11.2M
    case Type::AFV2: {
636
11.2M
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
11.2M
      break;
638
0
    }
639
11.2M
    case Type::AFV3: {
640
11.2M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
11.2M
      break;
642
0
    }
643
595k
    case Type::DCT64X32: {
644
595k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
595k
                                  scratch_space);
646
595k
      break;
647
0
    }
648
373k
    case Type::DCT32X64: {
649
373k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
373k
                                  scratch_space);
651
373k
      break;
652
0
    }
653
252k
    case Type::DCT64X64: {
654
252k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
252k
                                  scratch_space);
656
252k
      break;
657
0
    }
658
3
    case Type::DCT128X64: {
659
3
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
3
                                   scratch_space);
661
3
      break;
662
0
    }
663
9
    case Type::DCT64X128: {
664
9
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
9
                                   scratch_space);
666
9
      break;
667
0
    }
668
6
    case Type::DCT128X128: {
669
6
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
6
                                    scratch_space);
671
6
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
157M
  }
689
157M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
139M
                                        float* scratch_space) {
461
139M
  using Type = AcStrategyType;
462
139M
  switch (strategy) {
463
11.0M
    case Type::IDENTITY: {
464
11.0M
      float dcs[4] = {};
465
11.0M
      float block00 = coefficients[0];
466
11.0M
      float block01 = coefficients[1];
467
11.0M
      float block10 = coefficients[8];
468
11.0M
      float block11 = coefficients[9];
469
11.0M
      dcs[0] = block00 + block01 + block10 + block11;
470
11.0M
      dcs[1] = block00 + block01 - block10 - block11;
471
11.0M
      dcs[2] = block00 - block01 + block10 - block11;
472
11.0M
      dcs[3] = block00 - block01 - block10 + block11;
473
33.2M
      for (size_t y = 0; y < 2; y++) {
474
66.4M
        for (size_t x = 0; x < 2; x++) {
475
44.2M
          float block_dc = dcs[y * 2 + x];
476
44.2M
          float residual_sum = 0;
477
221M
          for (size_t iy = 0; iy < 4; iy++) {
478
885M
            for (size_t ix = 0; ix < 4; ix++) {
479
708M
              if (ix == 0 && iy == 0) continue;
480
664M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
664M
            }
482
177M
          }
483
44.2M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
44.2M
              block_dc - residual_sum * (1.0f / 16);
485
221M
          for (size_t iy = 0; iy < 4; iy++) {
486
885M
            for (size_t ix = 0; ix < 4; ix++) {
487
708M
              if (ix == 1 && iy == 1) continue;
488
664M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
664M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
664M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
664M
            }
492
177M
          }
493
44.2M
          pixels[y * 4 * pixels_stride + x * 4] =
494
44.2M
              coefficients[(y + 2) * 8 + x + 2] +
495
44.2M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
44.2M
        }
497
22.1M
      }
498
11.0M
      break;
499
0
    }
500
11.0M
    case Type::DCT8X4: {
501
11.0M
      float dcs[2] = {};
502
11.0M
      float block0 = coefficients[0];
503
11.0M
      float block1 = coefficients[8];
504
11.0M
      dcs[0] = block0 + block1;
505
11.0M
      dcs[1] = block0 - block1;
506
33.2M
      for (size_t x = 0; x < 2; x++) {
507
22.1M
        HWY_ALIGN float block[4 * 8];
508
22.1M
        block[0] = dcs[x];
509
110M
        for (size_t iy = 0; iy < 4; iy++) {
510
796M
          for (size_t ix = 0; ix < 8; ix++) {
511
708M
            if (ix == 0 && iy == 0) continue;
512
686M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
686M
          }
514
88.5M
        }
515
22.1M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
22.1M
                                  scratch_space);
517
22.1M
      }
518
11.0M
      break;
519
0
    }
520
11.0M
    case Type::DCT4X8: {
521
11.0M
      float dcs[2] = {};
522
11.0M
      float block0 = coefficients[0];
523
11.0M
      float block1 = coefficients[8];
524
11.0M
      dcs[0] = block0 + block1;
525
11.0M
      dcs[1] = block0 - block1;
526
33.2M
      for (size_t y = 0; y < 2; y++) {
527
22.1M
        HWY_ALIGN float block[4 * 8];
528
22.1M
        block[0] = dcs[y];
529
110M
        for (size_t iy = 0; iy < 4; iy++) {
530
796M
          for (size_t ix = 0; ix < 8; ix++) {
531
708M
            if (ix == 0 && iy == 0) continue;
532
686M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
686M
          }
534
88.5M
        }
535
22.1M
        ComputeScaledIDCT<4, 8>()(
536
22.1M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
22.1M
            scratch_space);
538
22.1M
      }
539
11.0M
      break;
540
0
    }
541
11.0M
    case Type::DCT4X4: {
542
11.0M
      float dcs[4] = {};
543
11.0M
      float block00 = coefficients[0];
544
11.0M
      float block01 = coefficients[1];
545
11.0M
      float block10 = coefficients[8];
546
11.0M
      float block11 = coefficients[9];
547
11.0M
      dcs[0] = block00 + block01 + block10 + block11;
548
11.0M
      dcs[1] = block00 + block01 - block10 - block11;
549
11.0M
      dcs[2] = block00 - block01 + block10 - block11;
550
11.0M
      dcs[3] = block00 - block01 - block10 + block11;
551
33.2M
      for (size_t y = 0; y < 2; y++) {
552
66.4M
        for (size_t x = 0; x < 2; x++) {
553
44.2M
          HWY_ALIGN float block[4 * 4];
554
44.2M
          block[0] = dcs[y * 2 + x];
555
221M
          for (size_t iy = 0; iy < 4; iy++) {
556
885M
            for (size_t ix = 0; ix < 4; ix++) {
557
708M
              if (ix == 0 && iy == 0) continue;
558
664M
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
664M
            }
560
177M
          }
561
44.2M
          ComputeScaledIDCT<4, 4>()(
562
44.2M
              block,
563
44.2M
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
44.2M
              scratch_space);
565
44.2M
        }
566
22.1M
      }
567
11.0M
      break;
568
0
    }
569
11.0M
    case Type::DCT2X2: {
570
11.0M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
11.0M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
11.0M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
11.0M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
11.0M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
99.6M
      for (size_t y = 0; y < kBlockDim; y++) {
576
796M
        for (size_t x = 0; x < kBlockDim; x++) {
577
708M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
708M
        }
579
88.5M
      }
580
11.0M
      break;
581
0
    }
582
4.71M
    case Type::DCT16X16: {
583
4.71M
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
4.71M
                                  scratch_space);
585
4.71M
      break;
586
0
    }
587
9.28M
    case Type::DCT16X8: {
588
9.28M
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
9.28M
                                 scratch_space);
590
9.28M
      break;
591
0
    }
592
9.27M
    case Type::DCT8X16: {
593
9.27M
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
9.27M
                                 scratch_space);
595
9.27M
      break;
596
0
    }
597
0
    case Type::DCT32X8: {
598
0
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
0
                                 scratch_space);
600
0
      break;
601
0
    }
602
0
    case Type::DCT8X32: {
603
0
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
0
                                 scratch_space);
605
0
      break;
606
0
    }
607
1.79M
    case Type::DCT32X16: {
608
1.79M
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
1.79M
                                  scratch_space);
610
1.79M
      break;
611
0
    }
612
1.77M
    case Type::DCT16X32: {
613
1.77M
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
1.77M
                                  scratch_space);
615
1.77M
      break;
616
0
    }
617
912k
    case Type::DCT32X32: {
618
912k
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
912k
                                  scratch_space);
620
912k
      break;
621
0
    }
622
11.0M
    case Type::DCT: {
623
11.0M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
11.0M
                                scratch_space);
625
11.0M
      break;
626
0
    }
627
11.0M
    case Type::AFV0: {
628
11.0M
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
11.0M
      break;
630
0
    }
631
11.0M
    case Type::AFV1: {
632
11.0M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
11.0M
      break;
634
0
    }
635
11.0M
    case Type::AFV2: {
636
11.0M
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
11.0M
      break;
638
0
    }
639
11.0M
    case Type::AFV3: {
640
11.0M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
11.0M
      break;
642
0
    }
643
556k
    case Type::DCT64X32: {
644
556k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
556k
                                  scratch_space);
646
556k
      break;
647
0
    }
648
359k
    case Type::DCT32X64: {
649
359k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
359k
                                  scratch_space);
651
359k
      break;
652
0
    }
653
153k
    case Type::DCT64X64: {
654
153k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
153k
                                  scratch_space);
656
153k
      break;
657
0
    }
658
0
    case Type::DCT128X64: {
659
0
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT64X128: {
664
0
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
0
    case Type::DCT128X128: {
669
0
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
0
                                    scratch_space);
671
0
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
139M
  }
689
139M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
18.0M
                                        float* scratch_space) {
461
18.0M
  using Type = AcStrategyType;
462
18.0M
  switch (strategy) {
463
3.07M
    case Type::IDENTITY: {
464
3.07M
      float dcs[4] = {};
465
3.07M
      float block00 = coefficients[0];
466
3.07M
      float block01 = coefficients[1];
467
3.07M
      float block10 = coefficients[8];
468
3.07M
      float block11 = coefficients[9];
469
3.07M
      dcs[0] = block00 + block01 + block10 + block11;
470
3.07M
      dcs[1] = block00 + block01 - block10 - block11;
471
3.07M
      dcs[2] = block00 - block01 + block10 - block11;
472
3.07M
      dcs[3] = block00 - block01 - block10 + block11;
473
9.23M
      for (size_t y = 0; y < 2; y++) {
474
18.4M
        for (size_t x = 0; x < 2; x++) {
475
12.3M
          float block_dc = dcs[y * 2 + x];
476
12.3M
          float residual_sum = 0;
477
61.5M
          for (size_t iy = 0; iy < 4; iy++) {
478
246M
            for (size_t ix = 0; ix < 4; ix++) {
479
196M
              if (ix == 0 && iy == 0) continue;
480
184M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
184M
            }
482
49.2M
          }
483
12.3M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
12.3M
              block_dc - residual_sum * (1.0f / 16);
485
61.5M
          for (size_t iy = 0; iy < 4; iy++) {
486
246M
            for (size_t ix = 0; ix < 4; ix++) {
487
196M
              if (ix == 1 && iy == 1) continue;
488
184M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
184M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
184M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
184M
            }
492
49.2M
          }
493
12.3M
          pixels[y * 4 * pixels_stride + x * 4] =
494
12.3M
              coefficients[(y + 2) * 8 + x + 2] +
495
12.3M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
12.3M
        }
497
6.15M
      }
498
3.07M
      break;
499
0
    }
500
444k
    case Type::DCT8X4: {
501
444k
      float dcs[2] = {};
502
444k
      float block0 = coefficients[0];
503
444k
      float block1 = coefficients[8];
504
444k
      dcs[0] = block0 + block1;
505
444k
      dcs[1] = block0 - block1;
506
1.33M
      for (size_t x = 0; x < 2; x++) {
507
888k
        HWY_ALIGN float block[4 * 8];
508
888k
        block[0] = dcs[x];
509
4.44M
        for (size_t iy = 0; iy < 4; iy++) {
510
31.9M
          for (size_t ix = 0; ix < 8; ix++) {
511
28.4M
            if (ix == 0 && iy == 0) continue;
512
27.5M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
27.5M
          }
514
3.55M
        }
515
888k
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
888k
                                  scratch_space);
517
888k
      }
518
444k
      break;
519
0
    }
520
193k
    case Type::DCT4X8: {
521
193k
      float dcs[2] = {};
522
193k
      float block0 = coefficients[0];
523
193k
      float block1 = coefficients[8];
524
193k
      dcs[0] = block0 + block1;
525
193k
      dcs[1] = block0 - block1;
526
580k
      for (size_t y = 0; y < 2; y++) {
527
387k
        HWY_ALIGN float block[4 * 8];
528
387k
        block[0] = dcs[y];
529
1.93M
        for (size_t iy = 0; iy < 4; iy++) {
530
13.9M
          for (size_t ix = 0; ix < 8; ix++) {
531
12.3M
            if (ix == 0 && iy == 0) continue;
532
12.0M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
12.0M
          }
534
1.54M
        }
535
387k
        ComputeScaledIDCT<4, 8>()(
536
387k
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
387k
            scratch_space);
538
387k
      }
539
193k
      break;
540
0
    }
541
3.63k
    case Type::DCT4X4: {
542
3.63k
      float dcs[4] = {};
543
3.63k
      float block00 = coefficients[0];
544
3.63k
      float block01 = coefficients[1];
545
3.63k
      float block10 = coefficients[8];
546
3.63k
      float block11 = coefficients[9];
547
3.63k
      dcs[0] = block00 + block01 + block10 + block11;
548
3.63k
      dcs[1] = block00 + block01 - block10 - block11;
549
3.63k
      dcs[2] = block00 - block01 + block10 - block11;
550
3.63k
      dcs[3] = block00 - block01 - block10 + block11;
551
10.9k
      for (size_t y = 0; y < 2; y++) {
552
21.8k
        for (size_t x = 0; x < 2; x++) {
553
14.5k
          HWY_ALIGN float block[4 * 4];
554
14.5k
          block[0] = dcs[y * 2 + x];
555
72.7k
          for (size_t iy = 0; iy < 4; iy++) {
556
290k
            for (size_t ix = 0; ix < 4; ix++) {
557
232k
              if (ix == 0 && iy == 0) continue;
558
218k
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
218k
            }
560
58.1k
          }
561
14.5k
          ComputeScaledIDCT<4, 4>()(
562
14.5k
              block,
563
14.5k
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
14.5k
              scratch_space);
565
14.5k
        }
566
7.27k
      }
567
3.63k
      break;
568
0
    }
569
5.96M
    case Type::DCT2X2: {
570
5.96M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
5.96M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
5.96M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
5.96M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
5.96M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
53.6M
      for (size_t y = 0; y < kBlockDim; y++) {
576
429M
        for (size_t x = 0; x < kBlockDim; x++) {
577
381M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
381M
        }
579
47.6M
      }
580
5.96M
      break;
581
0
    }
582
485k
    case Type::DCT16X16: {
583
485k
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
485k
                                  scratch_space);
585
485k
      break;
586
0
    }
587
650k
    case Type::DCT16X8: {
588
650k
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
650k
                                 scratch_space);
590
650k
      break;
591
0
    }
592
726k
    case Type::DCT8X16: {
593
726k
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
726k
                                 scratch_space);
595
726k
      break;
596
0
    }
597
693
    case Type::DCT32X8: {
598
693
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
693
                                 scratch_space);
600
693
      break;
601
0
    }
602
96
    case Type::DCT8X32: {
603
96
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
96
                                 scratch_space);
605
96
      break;
606
0
    }
607
140k
    case Type::DCT32X16: {
608
140k
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
140k
                                  scratch_space);
610
140k
      break;
611
0
    }
612
145k
    case Type::DCT16X32: {
613
145k
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
145k
                                  scratch_space);
615
145k
      break;
616
0
    }
617
252k
    case Type::DCT32X32: {
618
252k
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
252k
                                  scratch_space);
620
252k
      break;
621
0
    }
622
4.93M
    case Type::DCT: {
623
4.93M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
4.93M
                                scratch_space);
625
4.93M
      break;
626
0
    }
627
302k
    case Type::AFV0: {
628
302k
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
302k
      break;
630
0
    }
631
168k
    case Type::AFV1: {
632
168k
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
168k
      break;
634
0
    }
635
206k
    case Type::AFV2: {
636
206k
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
206k
      break;
638
0
    }
639
222k
    case Type::AFV3: {
640
222k
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
222k
      break;
642
0
    }
643
38.8k
    case Type::DCT64X32: {
644
38.8k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
38.8k
                                  scratch_space);
646
38.8k
      break;
647
0
    }
648
14.6k
    case Type::DCT32X64: {
649
14.6k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
14.6k
                                  scratch_space);
651
14.6k
      break;
652
0
    }
653
98.6k
    case Type::DCT64X64: {
654
98.6k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
98.6k
                                  scratch_space);
656
98.6k
      break;
657
0
    }
658
3
    case Type::DCT128X64: {
659
3
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
3
                                   scratch_space);
661
3
      break;
662
0
    }
663
9
    case Type::DCT64X128: {
664
9
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
9
                                   scratch_space);
666
9
      break;
667
0
    }
668
6
    case Type::DCT128X128: {
669
6
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
6
                                    scratch_space);
671
6
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
18.0M
  }
689
18.0M
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
690
691
HWY_MAYBE_UNUSED void LowestFrequenciesFromDC(const AcStrategyType strategy,
692
                                              const float* dc, size_t dc_stride,
693
                                              float* llf,
694
18.3M
                                              float* JXL_RESTRICT scratch) {
695
18.3M
  using Type = AcStrategyType;
696
18.3M
  HWY_ALIGN float warm_block[4 * 4];
697
18.3M
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
18.3M
  switch (strategy) {
699
650k
    case Type::DCT16X8: {
700
650k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
650k
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
650k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
650k
      break;
704
0
    }
705
726k
    case Type::DCT8X16: {
706
726k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
726k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
726k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
726k
      break;
710
0
    }
711
485k
    case Type::DCT16X16: {
712
485k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
485k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
485k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
485k
      break;
716
0
    }
717
693
    case Type::DCT32X8: {
718
693
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
693
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
693
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
693
      break;
722
0
    }
723
96
    case Type::DCT8X32: {
724
96
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
96
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
96
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
96
      break;
728
0
    }
729
140k
    case Type::DCT32X16: {
730
140k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
140k
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
140k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
140k
      break;
734
0
    }
735
145k
    case Type::DCT16X32: {
736
145k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
145k
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
145k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
145k
      break;
740
0
    }
741
252k
    case Type::DCT32X32: {
742
252k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
252k
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
252k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
252k
      break;
746
0
    }
747
38.8k
    case Type::DCT64X32: {
748
38.8k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
38.8k
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
38.8k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
38.8k
      break;
752
0
    }
753
14.6k
    case Type::DCT32X64: {
754
14.6k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
14.6k
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
14.6k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
14.6k
      break;
758
0
    }
759
98.6k
    case Type::DCT64X64: {
760
98.6k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
98.6k
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
98.6k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
98.6k
      break;
764
0
    }
765
3
    case Type::DCT128X64: {
766
3
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
3
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
3
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
3
      break;
770
0
    }
771
9
    case Type::DCT64X128: {
772
9
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
9
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
9
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
9
      break;
776
0
    }
777
6
    case Type::DCT128X128: {
778
6
      ReinterpretingDCT<
779
6
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
6
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
6
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
6
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
0
    case Type::DCT256X256: {
799
0
      ReinterpretingDCT<
800
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
0
      break;
804
0
    }
805
4.97M
    case Type::DCT:
806
10.9M
    case Type::DCT2X2:
807
10.9M
    case Type::DCT4X4:
808
11.1M
    case Type::DCT4X8:
809
11.5M
    case Type::DCT8X4:
810
11.8M
    case Type::AFV0:
811
12.0M
    case Type::AFV1:
812
12.2M
    case Type::AFV2:
813
12.4M
    case Type::AFV3:
814
15.7M
    case Type::IDENTITY:
815
15.7M
      llf[0] = dc[0];
816
15.7M
      break;
817
18.3M
  };
818
18.3M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
694
18.3M
                                              float* JXL_RESTRICT scratch) {
695
18.3M
  using Type = AcStrategyType;
696
18.3M
  HWY_ALIGN float warm_block[4 * 4];
697
18.3M
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
18.3M
  switch (strategy) {
699
650k
    case Type::DCT16X8: {
700
650k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
650k
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
650k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
650k
      break;
704
0
    }
705
726k
    case Type::DCT8X16: {
706
726k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
726k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
726k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
726k
      break;
710
0
    }
711
485k
    case Type::DCT16X16: {
712
485k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
485k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
485k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
485k
      break;
716
0
    }
717
693
    case Type::DCT32X8: {
718
693
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
693
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
693
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
693
      break;
722
0
    }
723
96
    case Type::DCT8X32: {
724
96
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
96
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
96
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
96
      break;
728
0
    }
729
140k
    case Type::DCT32X16: {
730
140k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
140k
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
140k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
140k
      break;
734
0
    }
735
145k
    case Type::DCT16X32: {
736
145k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
145k
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
145k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
145k
      break;
740
0
    }
741
252k
    case Type::DCT32X32: {
742
252k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
252k
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
252k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
252k
      break;
746
0
    }
747
38.8k
    case Type::DCT64X32: {
748
38.8k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
38.8k
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
38.8k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
38.8k
      break;
752
0
    }
753
14.6k
    case Type::DCT32X64: {
754
14.6k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
14.6k
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
14.6k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
14.6k
      break;
758
0
    }
759
98.6k
    case Type::DCT64X64: {
760
98.6k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
98.6k
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
98.6k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
98.6k
      break;
764
0
    }
765
3
    case Type::DCT128X64: {
766
3
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
3
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
3
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
3
      break;
770
0
    }
771
9
    case Type::DCT64X128: {
772
9
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
9
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
9
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
9
      break;
776
0
    }
777
6
    case Type::DCT128X128: {
778
6
      ReinterpretingDCT<
779
6
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
6
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
6
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
6
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
0
    case Type::DCT256X256: {
799
0
      ReinterpretingDCT<
800
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
0
      break;
804
0
    }
805
4.97M
    case Type::DCT:
806
10.9M
    case Type::DCT2X2:
807
10.9M
    case Type::DCT4X4:
808
11.1M
    case Type::DCT4X8:
809
11.5M
    case Type::DCT8X4:
810
11.8M
    case Type::AFV0:
811
12.0M
    case Type::AFV1:
812
12.2M
    case Type::AFV2:
813
12.4M
    case Type::AFV3:
814
15.7M
    case Type::IDENTITY:
815
15.7M
      llf[0] = dc[0];
816
15.7M
      break;
817
18.3M
  };
818
18.3M
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
819
820
}  // namespace
821
// NOLINTNEXTLINE(google-readability-namespace-comments)
822
}  // namespace HWY_NAMESPACE
823
}  // namespace jxl
824
HWY_AFTER_NAMESPACE();
825
826
#endif  // LIB_JXL_DEC_TRANSFORMS_INL_H_