Coverage Report

Created: 2026-02-14 07:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/dec_transforms-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include <cstring>
7
8
#include "lib/jxl/base/compiler_specific.h"
9
#include "lib/jxl/frame_dimensions.h"
10
11
#if defined(LIB_JXL_DEC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
12
#ifdef LIB_JXL_DEC_TRANSFORMS_INL_H_
13
#undef LIB_JXL_DEC_TRANSFORMS_INL_H_
14
#else
15
#define LIB_JXL_DEC_TRANSFORMS_INL_H_
16
#endif
17
18
#include <cstddef>
19
#include <hwy/highway.h>
20
21
#include "lib/jxl/ac_strategy.h"
22
#include "lib/jxl/dct-inl.h"
23
#include "lib/jxl/dct_scales.h"
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
namespace HWY_NAMESPACE {
27
namespace {
28
29
// These templates are not found via ADL.
30
using hwy::HWY_NAMESPACE::MulAdd;
31
32
// Computes the lowest-frequency LF_ROWSxLF_COLS-sized square in output, which
33
// is a DCT_ROWS*DCT_COLS-sized DCT block, by doing a ROWS*COLS DCT on the
34
// input block.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
JXL_INLINE void ReinterpretingDCT(const float* input, const size_t input_stride,
38
                                  float* output, const size_t output_stride,
39
                                  float* JXL_RESTRICT block,
40
2.46M
                                  float* JXL_RESTRICT scratch_space) {
41
2.46M
  static_assert(LF_ROWS == ROWS,
42
2.46M
                "ReinterpretingDCT should only be called with LF == N");
43
2.46M
  static_assert(LF_COLS == COLS,
44
2.46M
                "ReinterpretingDCT should only be called with LF == N");
45
2.46M
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
2.46M
                                 scratch_space);
47
2.46M
  if (ROWS < COLS) {
48
1.84M
    for (size_t y = 0; y < LF_ROWS; y++) {
49
3.94M
      for (size_t x = 0; x < LF_COLS; x++) {
50
2.92M
        output[y * output_stride + x] =
51
2.92M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
2.92M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
2.92M
      }
54
1.01M
    }
55
1.63M
  } else {
56
5.69M
    for (size_t y = 0; y < LF_COLS; y++) {
57
22.5M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
18.5M
        output[y * output_stride + x] =
59
18.5M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
18.5M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
18.5M
      }
62
4.06M
    }
63
1.63M
  }
64
2.46M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
628k
                                  float* JXL_RESTRICT scratch_space) {
41
628k
  static_assert(LF_ROWS == ROWS,
42
628k
                "ReinterpretingDCT should only be called with LF == N");
43
628k
  static_assert(LF_COLS == COLS,
44
628k
                "ReinterpretingDCT should only be called with LF == N");
45
628k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
628k
                                 scratch_space);
47
628k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
628k
  } else {
56
1.25M
    for (size_t y = 0; y < LF_COLS; y++) {
57
1.88M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.25M
        output[y * output_stride + x] =
59
1.25M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.25M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.25M
      }
62
628k
    }
63
628k
  }
64
628k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
683k
                                  float* JXL_RESTRICT scratch_space) {
41
683k
  static_assert(LF_ROWS == ROWS,
42
683k
                "ReinterpretingDCT should only be called with LF == N");
43
683k
  static_assert(LF_COLS == COLS,
44
683k
                "ReinterpretingDCT should only be called with LF == N");
45
683k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
683k
                                 scratch_space);
47
683k
  if (ROWS < COLS) {
48
1.36M
    for (size_t y = 0; y < LF_ROWS; y++) {
49
2.05M
      for (size_t x = 0; x < LF_COLS; x++) {
50
1.36M
        output[y * output_stride + x] =
51
1.36M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
1.36M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
1.36M
      }
54
683k
    }
55
683k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
683k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
445k
                                  float* JXL_RESTRICT scratch_space) {
41
445k
  static_assert(LF_ROWS == ROWS,
42
445k
                "ReinterpretingDCT should only be called with LF == N");
43
445k
  static_assert(LF_COLS == COLS,
44
445k
                "ReinterpretingDCT should only be called with LF == N");
45
445k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
445k
                                 scratch_space);
47
445k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
445k
  } else {
56
1.33M
    for (size_t y = 0; y < LF_COLS; y++) {
57
2.67M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.78M
        output[y * output_stride + x] =
59
1.78M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.78M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.78M
      }
62
890k
    }
63
445k
  }
64
445k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
708
                                  float* JXL_RESTRICT scratch_space) {
41
708
  static_assert(LF_ROWS == ROWS,
42
708
                "ReinterpretingDCT should only be called with LF == N");
43
708
  static_assert(LF_COLS == COLS,
44
708
                "ReinterpretingDCT should only be called with LF == N");
45
708
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
708
                                 scratch_space);
47
708
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
708
  } else {
56
1.41k
    for (size_t y = 0; y < LF_COLS; y++) {
57
3.54k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
2.83k
        output[y * output_stride + x] =
59
2.83k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
2.83k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
2.83k
      }
62
708
    }
63
708
  }
64
708
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
117
                                  float* JXL_RESTRICT scratch_space) {
41
117
  static_assert(LF_ROWS == ROWS,
42
117
                "ReinterpretingDCT should only be called with LF == N");
43
117
  static_assert(LF_COLS == COLS,
44
117
                "ReinterpretingDCT should only be called with LF == N");
45
117
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
117
                                 scratch_space);
47
117
  if (ROWS < COLS) {
48
234
    for (size_t y = 0; y < LF_ROWS; y++) {
49
585
      for (size_t x = 0; x < LF_COLS; x++) {
50
468
        output[y * output_stride + x] =
51
468
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
468
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
468
      }
54
117
    }
55
117
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
117
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
136k
                                  float* JXL_RESTRICT scratch_space) {
41
136k
  static_assert(LF_ROWS == ROWS,
42
136k
                "ReinterpretingDCT should only be called with LF == N");
43
136k
  static_assert(LF_COLS == COLS,
44
136k
                "ReinterpretingDCT should only be called with LF == N");
45
136k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
136k
                                 scratch_space);
47
136k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
136k
  } else {
56
410k
    for (size_t y = 0; y < LF_COLS; y++) {
57
1.36M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.09M
        output[y * output_stride + x] =
59
1.09M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.09M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.09M
      }
62
273k
    }
63
136k
  }
64
136k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
133k
                                  float* JXL_RESTRICT scratch_space) {
41
133k
  static_assert(LF_ROWS == ROWS,
42
133k
                "ReinterpretingDCT should only be called with LF == N");
43
133k
  static_assert(LF_COLS == COLS,
44
133k
                "ReinterpretingDCT should only be called with LF == N");
45
133k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
133k
                                 scratch_space);
47
133k
  if (ROWS < COLS) {
48
400k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
1.33M
      for (size_t x = 0; x < LF_COLS; x++) {
50
1.06M
        output[y * output_stride + x] =
51
1.06M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
1.06M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
1.06M
      }
54
267k
    }
55
133k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
133k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
235k
                                  float* JXL_RESTRICT scratch_space) {
41
235k
  static_assert(LF_ROWS == ROWS,
42
235k
                "ReinterpretingDCT should only be called with LF == N");
43
235k
  static_assert(LF_COLS == COLS,
44
235k
                "ReinterpretingDCT should only be called with LF == N");
45
235k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
235k
                                 scratch_space);
47
235k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
235k
  } else {
56
1.17M
    for (size_t y = 0; y < LF_COLS; y++) {
57
4.71M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
3.77M
        output[y * output_stride + x] =
59
3.77M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
3.77M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
3.77M
      }
62
943k
    }
63
235k
  }
64
235k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
36.2k
                                  float* JXL_RESTRICT scratch_space) {
41
36.2k
  static_assert(LF_ROWS == ROWS,
42
36.2k
                "ReinterpretingDCT should only be called with LF == N");
43
36.2k
  static_assert(LF_COLS == COLS,
44
36.2k
                "ReinterpretingDCT should only be called with LF == N");
45
36.2k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
36.2k
                                 scratch_space);
47
36.2k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
36.2k
  } else {
56
181k
    for (size_t y = 0; y < LF_COLS; y++) {
57
1.30M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.15M
        output[y * output_stride + x] =
59
1.15M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.15M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.15M
      }
62
144k
    }
63
36.2k
  }
64
36.2k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
15.3k
                                  float* JXL_RESTRICT scratch_space) {
41
15.3k
  static_assert(LF_ROWS == ROWS,
42
15.3k
                "ReinterpretingDCT should only be called with LF == N");
43
15.3k
  static_assert(LF_COLS == COLS,
44
15.3k
                "ReinterpretingDCT should only be called with LF == N");
45
15.3k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
15.3k
                                 scratch_space);
47
15.3k
  if (ROWS < COLS) {
48
76.7k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
552k
      for (size_t x = 0; x < LF_COLS; x++) {
50
491k
        output[y * output_stride + x] =
51
491k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
491k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
491k
      }
54
61.4k
    }
55
15.3k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
15.3k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
147k
                                  float* JXL_RESTRICT scratch_space) {
41
147k
  static_assert(LF_ROWS == ROWS,
42
147k
                "ReinterpretingDCT should only be called with LF == N");
43
147k
  static_assert(LF_COLS == COLS,
44
147k
                "ReinterpretingDCT should only be called with LF == N");
45
147k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
147k
                                 scratch_space);
47
147k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
147k
  } else {
56
1.32M
    for (size_t y = 0; y < LF_COLS; y++) {
57
10.6M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
9.44M
        output[y * output_stride + x] =
59
9.44M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
9.44M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
9.44M
      }
62
1.18M
    }
63
147k
  }
64
147k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
3
                                  float* JXL_RESTRICT scratch_space) {
41
3
  static_assert(LF_ROWS == ROWS,
42
3
                "ReinterpretingDCT should only be called with LF == N");
43
3
  static_assert(LF_COLS == COLS,
44
3
                "ReinterpretingDCT should only be called with LF == N");
45
3
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
3
                                 scratch_space);
47
3
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
3
  } else {
56
27
    for (size_t y = 0; y < LF_COLS; y++) {
57
408
      for (size_t x = 0; x < LF_ROWS; x++) {
58
384
        output[y * output_stride + x] =
59
384
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
384
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
384
      }
62
24
    }
63
3
  }
64
3
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
6
                                  float* JXL_RESTRICT scratch_space) {
41
6
  static_assert(LF_ROWS == ROWS,
42
6
                "ReinterpretingDCT should only be called with LF == N");
43
6
  static_assert(LF_COLS == COLS,
44
6
                "ReinterpretingDCT should only be called with LF == N");
45
6
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
6
                                 scratch_space);
47
6
  if (ROWS < COLS) {
48
54
    for (size_t y = 0; y < LF_ROWS; y++) {
49
816
      for (size_t x = 0; x < LF_COLS; x++) {
50
768
        output[y * output_stride + x] =
51
768
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
768
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
768
      }
54
48
    }
55
6
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
6
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
18
                                  float* JXL_RESTRICT scratch_space) {
41
18
  static_assert(LF_ROWS == ROWS,
42
18
                "ReinterpretingDCT should only be called with LF == N");
43
18
  static_assert(LF_COLS == COLS,
44
18
                "ReinterpretingDCT should only be called with LF == N");
45
18
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
18
                                 scratch_space);
47
18
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
18
  } else {
56
306
    for (size_t y = 0; y < LF_COLS; y++) {
57
4.89k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
4.60k
        output[y * output_stride + x] =
59
4.60k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
4.60k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
4.60k
      }
62
288
    }
63
18
  }
64
18
}
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
65
66
template <size_t S>
67
51.2M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
51.2M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
51.2M
  static_assert(S % 2 == 0, "S should be even");
70
51.2M
  float temp[kDCTBlockSize];
71
51.2M
  constexpr size_t num_2x2 = S / 2;
72
170M
  for (size_t y = 0; y < num_2x2; y++) {
73
477M
    for (size_t x = 0; x < num_2x2; x++) {
74
358M
      float c00 = block[y * kBlockDim + x];
75
358M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
358M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
358M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
358M
      float r00 = c00 + c01 + c10 + c11;
79
358M
      float r01 = c00 + c01 - c10 - c11;
80
358M
      float r10 = c00 - c01 + c10 - c11;
81
358M
      float r11 = c00 - c01 - c10 + c11;
82
358M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
358M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
358M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
358M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
358M
    }
87
119M
  }
88
290M
  for (size_t y = 0; y < S; y++) {
89
1.67G
    for (size_t x = 0; x < S; x++) {
90
1.43G
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
1.43G
    }
92
238M
  }
93
51.2M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
11.4M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
11.4M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
11.4M
  static_assert(S % 2 == 0, "S should be even");
70
11.4M
  float temp[kDCTBlockSize];
71
11.4M
  constexpr size_t num_2x2 = S / 2;
72
22.9M
  for (size_t y = 0; y < num_2x2; y++) {
73
22.9M
    for (size_t x = 0; x < num_2x2; x++) {
74
11.4M
      float c00 = block[y * kBlockDim + x];
75
11.4M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
11.4M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
11.4M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
11.4M
      float r00 = c00 + c01 + c10 + c11;
79
11.4M
      float r01 = c00 + c01 - c10 - c11;
80
11.4M
      float r10 = c00 - c01 + c10 - c11;
81
11.4M
      float r11 = c00 - c01 - c10 + c11;
82
11.4M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
11.4M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
11.4M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
11.4M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
11.4M
    }
87
11.4M
  }
88
34.4M
  for (size_t y = 0; y < S; y++) {
89
68.9M
    for (size_t x = 0; x < S; x++) {
90
45.9M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
45.9M
    }
92
22.9M
  }
93
11.4M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
11.4M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
11.4M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
11.4M
  static_assert(S % 2 == 0, "S should be even");
70
11.4M
  float temp[kDCTBlockSize];
71
11.4M
  constexpr size_t num_2x2 = S / 2;
72
34.4M
  for (size_t y = 0; y < num_2x2; y++) {
73
68.9M
    for (size_t x = 0; x < num_2x2; x++) {
74
45.9M
      float c00 = block[y * kBlockDim + x];
75
45.9M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
45.9M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
45.9M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
45.9M
      float r00 = c00 + c01 + c10 + c11;
79
45.9M
      float r01 = c00 + c01 - c10 - c11;
80
45.9M
      float r10 = c00 - c01 + c10 - c11;
81
45.9M
      float r11 = c00 - c01 - c10 + c11;
82
45.9M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
45.9M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
45.9M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
45.9M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
45.9M
    }
87
22.9M
  }
88
57.4M
  for (size_t y = 0; y < S; y++) {
89
229M
    for (size_t x = 0; x < S; x++) {
90
183M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
183M
    }
92
45.9M
  }
93
11.4M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
11.4M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
11.4M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
11.4M
  static_assert(S % 2 == 0, "S should be even");
70
11.4M
  float temp[kDCTBlockSize];
71
11.4M
  constexpr size_t num_2x2 = S / 2;
72
57.4M
  for (size_t y = 0; y < num_2x2; y++) {
73
229M
    for (size_t x = 0; x < num_2x2; x++) {
74
183M
      float c00 = block[y * kBlockDim + x];
75
183M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
183M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
183M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
183M
      float r00 = c00 + c01 + c10 + c11;
79
183M
      float r01 = c00 + c01 - c10 - c11;
80
183M
      float r10 = c00 - c01 + c10 - c11;
81
183M
      float r11 = c00 - c01 - c10 + c11;
82
183M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
183M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
183M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
183M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
183M
    }
87
45.9M
  }
88
103M
  for (size_t y = 0; y < S; y++) {
89
827M
    for (size_t x = 0; x < S; x++) {
90
735M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
735M
    }
92
91.9M
  }
93
11.4M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
5.57M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
5.57M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
5.57M
  static_assert(S % 2 == 0, "S should be even");
70
5.57M
  float temp[kDCTBlockSize];
71
5.57M
  constexpr size_t num_2x2 = S / 2;
72
11.1M
  for (size_t y = 0; y < num_2x2; y++) {
73
11.1M
    for (size_t x = 0; x < num_2x2; x++) {
74
5.57M
      float c00 = block[y * kBlockDim + x];
75
5.57M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
5.57M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
5.57M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
5.57M
      float r00 = c00 + c01 + c10 + c11;
79
5.57M
      float r01 = c00 + c01 - c10 - c11;
80
5.57M
      float r10 = c00 - c01 + c10 - c11;
81
5.57M
      float r11 = c00 - c01 - c10 + c11;
82
5.57M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
5.57M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
5.57M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
5.57M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
5.57M
    }
87
5.57M
  }
88
16.7M
  for (size_t y = 0; y < S; y++) {
89
33.4M
    for (size_t x = 0; x < S; x++) {
90
22.2M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
22.2M
    }
92
11.1M
  }
93
5.57M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
5.57M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
5.57M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
5.57M
  static_assert(S % 2 == 0, "S should be even");
70
5.57M
  float temp[kDCTBlockSize];
71
5.57M
  constexpr size_t num_2x2 = S / 2;
72
16.7M
  for (size_t y = 0; y < num_2x2; y++) {
73
33.4M
    for (size_t x = 0; x < num_2x2; x++) {
74
22.2M
      float c00 = block[y * kBlockDim + x];
75
22.2M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
22.2M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
22.2M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
22.2M
      float r00 = c00 + c01 + c10 + c11;
79
22.2M
      float r01 = c00 + c01 - c10 - c11;
80
22.2M
      float r10 = c00 - c01 + c10 - c11;
81
22.2M
      float r11 = c00 - c01 - c10 + c11;
82
22.2M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
22.2M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
22.2M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
22.2M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
22.2M
    }
87
11.1M
  }
88
27.8M
  for (size_t y = 0; y < S; y++) {
89
111M
    for (size_t x = 0; x < S; x++) {
90
89.1M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
89.1M
    }
92
22.2M
  }
93
5.57M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
5.57M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
5.57M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
5.57M
  static_assert(S % 2 == 0, "S should be even");
70
5.57M
  float temp[kDCTBlockSize];
71
5.57M
  constexpr size_t num_2x2 = S / 2;
72
27.8M
  for (size_t y = 0; y < num_2x2; y++) {
73
111M
    for (size_t x = 0; x < num_2x2; x++) {
74
89.1M
      float c00 = block[y * kBlockDim + x];
75
89.1M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
89.1M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
89.1M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
89.1M
      float r00 = c00 + c01 + c10 + c11;
79
89.1M
      float r01 = c00 + c01 - c10 - c11;
80
89.1M
      float r10 = c00 - c01 + c10 - c11;
81
89.1M
      float r11 = c00 - c01 - c10 + c11;
82
89.1M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
89.1M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
89.1M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
89.1M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
89.1M
    }
87
22.2M
  }
88
50.1M
  for (size_t y = 0; y < S; y++) {
89
401M
    for (size_t x = 0; x < S; x++) {
90
356M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
356M
    }
92
44.5M
  }
93
5.57M
}
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
94
95
46.8M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
46.8M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
46.8M
      {
98
46.8M
          0.25,
99
46.8M
          0.25,
100
46.8M
          0.25,
101
46.8M
          0.25,
102
46.8M
          0.25,
103
46.8M
          0.25,
104
46.8M
          0.25,
105
46.8M
          0.25,
106
46.8M
          0.25,
107
46.8M
          0.25,
108
46.8M
          0.25,
109
46.8M
          0.25,
110
46.8M
          0.25,
111
46.8M
          0.25,
112
46.8M
          0.25,
113
46.8M
          0.25,
114
46.8M
      },
115
46.8M
      {
116
46.8M
          0.876902929799142f,
117
46.8M
          0.2206518106944235f,
118
46.8M
          -0.10140050393753763f,
119
46.8M
          -0.1014005039375375f,
120
46.8M
          0.2206518106944236f,
121
46.8M
          -0.10140050393753777f,
122
46.8M
          -0.10140050393753772f,
123
46.8M
          -0.10140050393753763f,
124
46.8M
          -0.10140050393753758f,
125
46.8M
          -0.10140050393753769f,
126
46.8M
          -0.1014005039375375f,
127
46.8M
          -0.10140050393753768f,
128
46.8M
          -0.10140050393753768f,
129
46.8M
          -0.10140050393753759f,
130
46.8M
          -0.10140050393753763f,
131
46.8M
          -0.10140050393753741f,
132
46.8M
      },
133
46.8M
      {
134
46.8M
          0.0,
135
46.8M
          0.0,
136
46.8M
          0.40670075830260755f,
137
46.8M
          0.44444816619734445f,
138
46.8M
          0.0,
139
46.8M
          0.0,
140
46.8M
          0.19574399372042936f,
141
46.8M
          0.2929100136981264f,
142
46.8M
          -0.40670075830260716f,
143
46.8M
          -0.19574399372042872f,
144
46.8M
          0.0,
145
46.8M
          0.11379074460448091f,
146
46.8M
          -0.44444816619734384f,
147
46.8M
          -0.29291001369812636f,
148
46.8M
          -0.1137907446044814f,
149
46.8M
          0.0,
150
46.8M
      },
151
46.8M
      {
152
46.8M
          0.0,
153
46.8M
          0.0,
154
46.8M
          -0.21255748058288748f,
155
46.8M
          0.3085497062849767f,
156
46.8M
          0.0,
157
46.8M
          0.4706702258572536f,
158
46.8M
          -0.1621205195722993f,
159
46.8M
          0.0,
160
46.8M
          -0.21255748058287047f,
161
46.8M
          -0.16212051957228327f,
162
46.8M
          -0.47067022585725277f,
163
46.8M
          -0.1464291867126764f,
164
46.8M
          0.3085497062849487f,
165
46.8M
          0.0,
166
46.8M
          -0.14642918671266536f,
167
46.8M
          0.4251149611657548f,
168
46.8M
      },
169
46.8M
      {
170
46.8M
          0.0,
171
46.8M
          -0.7071067811865474f,
172
46.8M
          0.0,
173
46.8M
          0.0,
174
46.8M
          0.7071067811865476f,
175
46.8M
          0.0,
176
46.8M
          0.0,
177
46.8M
          0.0,
178
46.8M
          0.0,
179
46.8M
          0.0,
180
46.8M
          0.0,
181
46.8M
          0.0,
182
46.8M
          0.0,
183
46.8M
          0.0,
184
46.8M
          0.0,
185
46.8M
          0.0,
186
46.8M
      },
187
46.8M
      {
188
46.8M
          -0.4105377591765233f,
189
46.8M
          0.6235485373547691f,
190
46.8M
          -0.06435071657946274f,
191
46.8M
          -0.06435071657946266f,
192
46.8M
          0.6235485373547694f,
193
46.8M
          -0.06435071657946284f,
194
46.8M
          -0.0643507165794628f,
195
46.8M
          -0.06435071657946274f,
196
46.8M
          -0.06435071657946272f,
197
46.8M
          -0.06435071657946279f,
198
46.8M
          -0.06435071657946266f,
199
46.8M
          -0.06435071657946277f,
200
46.8M
          -0.06435071657946277f,
201
46.8M
          -0.06435071657946273f,
202
46.8M
          -0.06435071657946274f,
203
46.8M
          -0.0643507165794626f,
204
46.8M
      },
205
46.8M
      {
206
46.8M
          0.0,
207
46.8M
          0.0,
208
46.8M
          -0.4517556589999482f,
209
46.8M
          0.15854503551840063f,
210
46.8M
          0.0,
211
46.8M
          -0.04038515160822202f,
212
46.8M
          0.0074182263792423875f,
213
46.8M
          0.39351034269210167f,
214
46.8M
          -0.45175565899994635f,
215
46.8M
          0.007418226379244351f,
216
46.8M
          0.1107416575309343f,
217
46.8M
          0.08298163094882051f,
218
46.8M
          0.15854503551839705f,
219
46.8M
          0.3935103426921022f,
220
46.8M
          0.0829816309488214f,
221
46.8M
          -0.45175565899994796f,
222
46.8M
      },
223
46.8M
      {
224
46.8M
          0.0,
225
46.8M
          0.0,
226
46.8M
          -0.304684750724869f,
227
46.8M
          0.5112616136591823f,
228
46.8M
          0.0,
229
46.8M
          0.0,
230
46.8M
          -0.290480129728998f,
231
46.8M
          -0.06578701549142804f,
232
46.8M
          0.304684750724884f,
233
46.8M
          0.2904801297290076f,
234
46.8M
          0.0,
235
46.8M
          -0.23889773523344604f,
236
46.8M
          -0.5112616136592012f,
237
46.8M
          0.06578701549142545f,
238
46.8M
          0.23889773523345467f,
239
46.8M
          0.0,
240
46.8M
      },
241
46.8M
      {
242
46.8M
          0.0,
243
46.8M
          0.0,
244
46.8M
          0.3017929516615495f,
245
46.8M
          0.25792362796341184f,
246
46.8M
          0.0,
247
46.8M
          0.16272340142866204f,
248
46.8M
          0.09520022653475037f,
249
46.8M
          0.0,
250
46.8M
          0.3017929516615503f,
251
46.8M
          0.09520022653475055f,
252
46.8M
          -0.16272340142866173f,
253
46.8M
          -0.35312385449816297f,
254
46.8M
          0.25792362796341295f,
255
46.8M
          0.0,
256
46.8M
          -0.3531238544981624f,
257
46.8M
          -0.6035859033230976f,
258
46.8M
      },
259
46.8M
      {
260
46.8M
          0.0,
261
46.8M
          0.0,
262
46.8M
          0.40824829046386274f,
263
46.8M
          0.0,
264
46.8M
          0.0,
265
46.8M
          0.0,
266
46.8M
          0.0,
267
46.8M
          -0.4082482904638628f,
268
46.8M
          -0.4082482904638635f,
269
46.8M
          0.0,
270
46.8M
          0.0,
271
46.8M
          -0.40824829046386296f,
272
46.8M
          0.0,
273
46.8M
          0.4082482904638634f,
274
46.8M
          0.408248290463863f,
275
46.8M
          0.0,
276
46.8M
      },
277
46.8M
      {
278
46.8M
          0.0,
279
46.8M
          0.0,
280
46.8M
          0.1747866975480809f,
281
46.8M
          0.0812611176717539f,
282
46.8M
          0.0,
283
46.8M
          0.0,
284
46.8M
          -0.3675398009862027f,
285
46.8M
          -0.307882213957909f,
286
46.8M
          -0.17478669754808135f,
287
46.8M
          0.3675398009862011f,
288
46.8M
          0.0,
289
46.8M
          0.4826689115059883f,
290
46.8M
          -0.08126111767175039f,
291
46.8M
          0.30788221395790305f,
292
46.8M
          -0.48266891150598584f,
293
46.8M
          0.0,
294
46.8M
      },
295
46.8M
      {
296
46.8M
          0.0,
297
46.8M
          0.0,
298
46.8M
          -0.21105601049335784f,
299
46.8M
          0.18567180916109802f,
300
46.8M
          0.0,
301
46.8M
          0.0,
302
46.8M
          0.49215859013738733f,
303
46.8M
          -0.38525013709251915f,
304
46.8M
          0.21105601049335806f,
305
46.8M
          -0.49215859013738905f,
306
46.8M
          0.0,
307
46.8M
          0.17419412659916217f,
308
46.8M
          -0.18567180916109904f,
309
46.8M
          0.3852501370925211f,
310
46.8M
          -0.1741941265991621f,
311
46.8M
          0.0,
312
46.8M
      },
313
46.8M
      {
314
46.8M
          0.0,
315
46.8M
          0.0,
316
46.8M
          -0.14266084808807264f,
317
46.8M
          -0.3416446842253372f,
318
46.8M
          0.0,
319
46.8M
          0.7367497537172237f,
320
46.8M
          0.24627107722075148f,
321
46.8M
          -0.08574019035519306f,
322
46.8M
          -0.14266084808807344f,
323
46.8M
          0.24627107722075137f,
324
46.8M
          0.14883399227113567f,
325
46.8M
          -0.04768680350229251f,
326
46.8M
          -0.3416446842253373f,
327
46.8M
          -0.08574019035519267f,
328
46.8M
          -0.047686803502292804f,
329
46.8M
          -0.14266084808807242f,
330
46.8M
      },
331
46.8M
      {
332
46.8M
          0.0,
333
46.8M
          0.0,
334
46.8M
          -0.13813540350758585f,
335
46.8M
          0.3302282550303788f,
336
46.8M
          0.0,
337
46.8M
          0.08755115000587084f,
338
46.8M
          -0.07946706605909573f,
339
46.8M
          -0.4613374887461511f,
340
46.8M
          -0.13813540350758294f,
341
46.8M
          -0.07946706605910261f,
342
46.8M
          0.49724647109535086f,
343
46.8M
          0.12538059448563663f,
344
46.8M
          0.3302282550303805f,
345
46.8M
          -0.4613374887461554f,
346
46.8M
          0.12538059448564315f,
347
46.8M
          -0.13813540350758452f,
348
46.8M
      },
349
46.8M
      {
350
46.8M
          0.0,
351
46.8M
          0.0,
352
46.8M
          -0.17437602599651067f,
353
46.8M
          0.0702790691196284f,
354
46.8M
          0.0,
355
46.8M
          -0.2921026642334881f,
356
46.8M
          0.3623817333531167f,
357
46.8M
          0.0,
358
46.8M
          -0.1743760259965108f,
359
46.8M
          0.36238173335311646f,
360
46.8M
          0.29210266423348785f,
361
46.8M
          -0.4326608024727445f,
362
46.8M
          0.07027906911962818f,
363
46.8M
          0.0,
364
46.8M
          -0.4326608024727457f,
365
46.8M
          0.34875205199302267f,
366
46.8M
      },
367
46.8M
      {
368
46.8M
          0.0,
369
46.8M
          0.0,
370
46.8M
          0.11354987314994337f,
371
46.8M
          -0.07417504595810355f,
372
46.8M
          0.0,
373
46.8M
          0.19402893032594343f,
374
46.8M
          -0.435190496523228f,
375
46.8M
          0.21918684838857466f,
376
46.8M
          0.11354987314994257f,
377
46.8M
          -0.4351904965232251f,
378
46.8M
          0.5550443808910661f,
379
46.8M
          -0.25468277124066463f,
380
46.8M
          -0.07417504595810233f,
381
46.8M
          0.2191868483885728f,
382
46.8M
          -0.25468277124066413f,
383
46.8M
          0.1135498731499429f,
384
46.8M
      },
385
46.8M
  };
386
387
46.8M
  const HWY_CAPPED(float, 16) d;
388
140M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
93.6M
    auto pixel = Zero(d);
390
1.59G
    for (size_t j = 0; j < 16; j++) {
391
1.49G
      auto cf = Set(d, coeffs[j]);
392
1.49G
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
1.49G
      pixel = MulAdd(cf, basis, pixel);
394
1.49G
    }
395
93.6M
    Store(pixel, d, pixels + i);
396
93.6M
  }
397
46.8M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
45.9M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
45.9M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
45.9M
      {
98
45.9M
          0.25,
99
45.9M
          0.25,
100
45.9M
          0.25,
101
45.9M
          0.25,
102
45.9M
          0.25,
103
45.9M
          0.25,
104
45.9M
          0.25,
105
45.9M
          0.25,
106
45.9M
          0.25,
107
45.9M
          0.25,
108
45.9M
          0.25,
109
45.9M
          0.25,
110
45.9M
          0.25,
111
45.9M
          0.25,
112
45.9M
          0.25,
113
45.9M
          0.25,
114
45.9M
      },
115
45.9M
      {
116
45.9M
          0.876902929799142f,
117
45.9M
          0.2206518106944235f,
118
45.9M
          -0.10140050393753763f,
119
45.9M
          -0.1014005039375375f,
120
45.9M
          0.2206518106944236f,
121
45.9M
          -0.10140050393753777f,
122
45.9M
          -0.10140050393753772f,
123
45.9M
          -0.10140050393753763f,
124
45.9M
          -0.10140050393753758f,
125
45.9M
          -0.10140050393753769f,
126
45.9M
          -0.1014005039375375f,
127
45.9M
          -0.10140050393753768f,
128
45.9M
          -0.10140050393753768f,
129
45.9M
          -0.10140050393753759f,
130
45.9M
          -0.10140050393753763f,
131
45.9M
          -0.10140050393753741f,
132
45.9M
      },
133
45.9M
      {
134
45.9M
          0.0,
135
45.9M
          0.0,
136
45.9M
          0.40670075830260755f,
137
45.9M
          0.44444816619734445f,
138
45.9M
          0.0,
139
45.9M
          0.0,
140
45.9M
          0.19574399372042936f,
141
45.9M
          0.2929100136981264f,
142
45.9M
          -0.40670075830260716f,
143
45.9M
          -0.19574399372042872f,
144
45.9M
          0.0,
145
45.9M
          0.11379074460448091f,
146
45.9M
          -0.44444816619734384f,
147
45.9M
          -0.29291001369812636f,
148
45.9M
          -0.1137907446044814f,
149
45.9M
          0.0,
150
45.9M
      },
151
45.9M
      {
152
45.9M
          0.0,
153
45.9M
          0.0,
154
45.9M
          -0.21255748058288748f,
155
45.9M
          0.3085497062849767f,
156
45.9M
          0.0,
157
45.9M
          0.4706702258572536f,
158
45.9M
          -0.1621205195722993f,
159
45.9M
          0.0,
160
45.9M
          -0.21255748058287047f,
161
45.9M
          -0.16212051957228327f,
162
45.9M
          -0.47067022585725277f,
163
45.9M
          -0.1464291867126764f,
164
45.9M
          0.3085497062849487f,
165
45.9M
          0.0,
166
45.9M
          -0.14642918671266536f,
167
45.9M
          0.4251149611657548f,
168
45.9M
      },
169
45.9M
      {
170
45.9M
          0.0,
171
45.9M
          -0.7071067811865474f,
172
45.9M
          0.0,
173
45.9M
          0.0,
174
45.9M
          0.7071067811865476f,
175
45.9M
          0.0,
176
45.9M
          0.0,
177
45.9M
          0.0,
178
45.9M
          0.0,
179
45.9M
          0.0,
180
45.9M
          0.0,
181
45.9M
          0.0,
182
45.9M
          0.0,
183
45.9M
          0.0,
184
45.9M
          0.0,
185
45.9M
          0.0,
186
45.9M
      },
187
45.9M
      {
188
45.9M
          -0.4105377591765233f,
189
45.9M
          0.6235485373547691f,
190
45.9M
          -0.06435071657946274f,
191
45.9M
          -0.06435071657946266f,
192
45.9M
          0.6235485373547694f,
193
45.9M
          -0.06435071657946284f,
194
45.9M
          -0.0643507165794628f,
195
45.9M
          -0.06435071657946274f,
196
45.9M
          -0.06435071657946272f,
197
45.9M
          -0.06435071657946279f,
198
45.9M
          -0.06435071657946266f,
199
45.9M
          -0.06435071657946277f,
200
45.9M
          -0.06435071657946277f,
201
45.9M
          -0.06435071657946273f,
202
45.9M
          -0.06435071657946274f,
203
45.9M
          -0.0643507165794626f,
204
45.9M
      },
205
45.9M
      {
206
45.9M
          0.0,
207
45.9M
          0.0,
208
45.9M
          -0.4517556589999482f,
209
45.9M
          0.15854503551840063f,
210
45.9M
          0.0,
211
45.9M
          -0.04038515160822202f,
212
45.9M
          0.0074182263792423875f,
213
45.9M
          0.39351034269210167f,
214
45.9M
          -0.45175565899994635f,
215
45.9M
          0.007418226379244351f,
216
45.9M
          0.1107416575309343f,
217
45.9M
          0.08298163094882051f,
218
45.9M
          0.15854503551839705f,
219
45.9M
          0.3935103426921022f,
220
45.9M
          0.0829816309488214f,
221
45.9M
          -0.45175565899994796f,
222
45.9M
      },
223
45.9M
      {
224
45.9M
          0.0,
225
45.9M
          0.0,
226
45.9M
          -0.304684750724869f,
227
45.9M
          0.5112616136591823f,
228
45.9M
          0.0,
229
45.9M
          0.0,
230
45.9M
          -0.290480129728998f,
231
45.9M
          -0.06578701549142804f,
232
45.9M
          0.304684750724884f,
233
45.9M
          0.2904801297290076f,
234
45.9M
          0.0,
235
45.9M
          -0.23889773523344604f,
236
45.9M
          -0.5112616136592012f,
237
45.9M
          0.06578701549142545f,
238
45.9M
          0.23889773523345467f,
239
45.9M
          0.0,
240
45.9M
      },
241
45.9M
      {
242
45.9M
          0.0,
243
45.9M
          0.0,
244
45.9M
          0.3017929516615495f,
245
45.9M
          0.25792362796341184f,
246
45.9M
          0.0,
247
45.9M
          0.16272340142866204f,
248
45.9M
          0.09520022653475037f,
249
45.9M
          0.0,
250
45.9M
          0.3017929516615503f,
251
45.9M
          0.09520022653475055f,
252
45.9M
          -0.16272340142866173f,
253
45.9M
          -0.35312385449816297f,
254
45.9M
          0.25792362796341295f,
255
45.9M
          0.0,
256
45.9M
          -0.3531238544981624f,
257
45.9M
          -0.6035859033230976f,
258
45.9M
      },
259
45.9M
      {
260
45.9M
          0.0,
261
45.9M
          0.0,
262
45.9M
          0.40824829046386274f,
263
45.9M
          0.0,
264
45.9M
          0.0,
265
45.9M
          0.0,
266
45.9M
          0.0,
267
45.9M
          -0.4082482904638628f,
268
45.9M
          -0.4082482904638635f,
269
45.9M
          0.0,
270
45.9M
          0.0,
271
45.9M
          -0.40824829046386296f,
272
45.9M
          0.0,
273
45.9M
          0.4082482904638634f,
274
45.9M
          0.408248290463863f,
275
45.9M
          0.0,
276
45.9M
      },
277
45.9M
      {
278
45.9M
          0.0,
279
45.9M
          0.0,
280
45.9M
          0.1747866975480809f,
281
45.9M
          0.0812611176717539f,
282
45.9M
          0.0,
283
45.9M
          0.0,
284
45.9M
          -0.3675398009862027f,
285
45.9M
          -0.307882213957909f,
286
45.9M
          -0.17478669754808135f,
287
45.9M
          0.3675398009862011f,
288
45.9M
          0.0,
289
45.9M
          0.4826689115059883f,
290
45.9M
          -0.08126111767175039f,
291
45.9M
          0.30788221395790305f,
292
45.9M
          -0.48266891150598584f,
293
45.9M
          0.0,
294
45.9M
      },
295
45.9M
      {
296
45.9M
          0.0,
297
45.9M
          0.0,
298
45.9M
          -0.21105601049335784f,
299
45.9M
          0.18567180916109802f,
300
45.9M
          0.0,
301
45.9M
          0.0,
302
45.9M
          0.49215859013738733f,
303
45.9M
          -0.38525013709251915f,
304
45.9M
          0.21105601049335806f,
305
45.9M
          -0.49215859013738905f,
306
45.9M
          0.0,
307
45.9M
          0.17419412659916217f,
308
45.9M
          -0.18567180916109904f,
309
45.9M
          0.3852501370925211f,
310
45.9M
          -0.1741941265991621f,
311
45.9M
          0.0,
312
45.9M
      },
313
45.9M
      {
314
45.9M
          0.0,
315
45.9M
          0.0,
316
45.9M
          -0.14266084808807264f,
317
45.9M
          -0.3416446842253372f,
318
45.9M
          0.0,
319
45.9M
          0.7367497537172237f,
320
45.9M
          0.24627107722075148f,
321
45.9M
          -0.08574019035519306f,
322
45.9M
          -0.14266084808807344f,
323
45.9M
          0.24627107722075137f,
324
45.9M
          0.14883399227113567f,
325
45.9M
          -0.04768680350229251f,
326
45.9M
          -0.3416446842253373f,
327
45.9M
          -0.08574019035519267f,
328
45.9M
          -0.047686803502292804f,
329
45.9M
          -0.14266084808807242f,
330
45.9M
      },
331
45.9M
      {
332
45.9M
          0.0,
333
45.9M
          0.0,
334
45.9M
          -0.13813540350758585f,
335
45.9M
          0.3302282550303788f,
336
45.9M
          0.0,
337
45.9M
          0.08755115000587084f,
338
45.9M
          -0.07946706605909573f,
339
45.9M
          -0.4613374887461511f,
340
45.9M
          -0.13813540350758294f,
341
45.9M
          -0.07946706605910261f,
342
45.9M
          0.49724647109535086f,
343
45.9M
          0.12538059448563663f,
344
45.9M
          0.3302282550303805f,
345
45.9M
          -0.4613374887461554f,
346
45.9M
          0.12538059448564315f,
347
45.9M
          -0.13813540350758452f,
348
45.9M
      },
349
45.9M
      {
350
45.9M
          0.0,
351
45.9M
          0.0,
352
45.9M
          -0.17437602599651067f,
353
45.9M
          0.0702790691196284f,
354
45.9M
          0.0,
355
45.9M
          -0.2921026642334881f,
356
45.9M
          0.3623817333531167f,
357
45.9M
          0.0,
358
45.9M
          -0.1743760259965108f,
359
45.9M
          0.36238173335311646f,
360
45.9M
          0.29210266423348785f,
361
45.9M
          -0.4326608024727445f,
362
45.9M
          0.07027906911962818f,
363
45.9M
          0.0,
364
45.9M
          -0.4326608024727457f,
365
45.9M
          0.34875205199302267f,
366
45.9M
      },
367
45.9M
      {
368
45.9M
          0.0,
369
45.9M
          0.0,
370
45.9M
          0.11354987314994337f,
371
45.9M
          -0.07417504595810355f,
372
45.9M
          0.0,
373
45.9M
          0.19402893032594343f,
374
45.9M
          -0.435190496523228f,
375
45.9M
          0.21918684838857466f,
376
45.9M
          0.11354987314994257f,
377
45.9M
          -0.4351904965232251f,
378
45.9M
          0.5550443808910661f,
379
45.9M
          -0.25468277124066463f,
380
45.9M
          -0.07417504595810233f,
381
45.9M
          0.2191868483885728f,
382
45.9M
          -0.25468277124066413f,
383
45.9M
          0.1135498731499429f,
384
45.9M
      },
385
45.9M
  };
386
387
45.9M
  const HWY_CAPPED(float, 16) d;
388
137M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
91.9M
    auto pixel = Zero(d);
390
1.56G
    for (size_t j = 0; j < 16; j++) {
391
1.47G
      auto cf = Set(d, coeffs[j]);
392
1.47G
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
1.47G
      pixel = MulAdd(cf, basis, pixel);
394
1.47G
    }
395
91.9M
    Store(pixel, d, pixels + i);
396
91.9M
  }
397
45.9M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
842k
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
842k
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
842k
      {
98
842k
          0.25,
99
842k
          0.25,
100
842k
          0.25,
101
842k
          0.25,
102
842k
          0.25,
103
842k
          0.25,
104
842k
          0.25,
105
842k
          0.25,
106
842k
          0.25,
107
842k
          0.25,
108
842k
          0.25,
109
842k
          0.25,
110
842k
          0.25,
111
842k
          0.25,
112
842k
          0.25,
113
842k
          0.25,
114
842k
      },
115
842k
      {
116
842k
          0.876902929799142f,
117
842k
          0.2206518106944235f,
118
842k
          -0.10140050393753763f,
119
842k
          -0.1014005039375375f,
120
842k
          0.2206518106944236f,
121
842k
          -0.10140050393753777f,
122
842k
          -0.10140050393753772f,
123
842k
          -0.10140050393753763f,
124
842k
          -0.10140050393753758f,
125
842k
          -0.10140050393753769f,
126
842k
          -0.1014005039375375f,
127
842k
          -0.10140050393753768f,
128
842k
          -0.10140050393753768f,
129
842k
          -0.10140050393753759f,
130
842k
          -0.10140050393753763f,
131
842k
          -0.10140050393753741f,
132
842k
      },
133
842k
      {
134
842k
          0.0,
135
842k
          0.0,
136
842k
          0.40670075830260755f,
137
842k
          0.44444816619734445f,
138
842k
          0.0,
139
842k
          0.0,
140
842k
          0.19574399372042936f,
141
842k
          0.2929100136981264f,
142
842k
          -0.40670075830260716f,
143
842k
          -0.19574399372042872f,
144
842k
          0.0,
145
842k
          0.11379074460448091f,
146
842k
          -0.44444816619734384f,
147
842k
          -0.29291001369812636f,
148
842k
          -0.1137907446044814f,
149
842k
          0.0,
150
842k
      },
151
842k
      {
152
842k
          0.0,
153
842k
          0.0,
154
842k
          -0.21255748058288748f,
155
842k
          0.3085497062849767f,
156
842k
          0.0,
157
842k
          0.4706702258572536f,
158
842k
          -0.1621205195722993f,
159
842k
          0.0,
160
842k
          -0.21255748058287047f,
161
842k
          -0.16212051957228327f,
162
842k
          -0.47067022585725277f,
163
842k
          -0.1464291867126764f,
164
842k
          0.3085497062849487f,
165
842k
          0.0,
166
842k
          -0.14642918671266536f,
167
842k
          0.4251149611657548f,
168
842k
      },
169
842k
      {
170
842k
          0.0,
171
842k
          -0.7071067811865474f,
172
842k
          0.0,
173
842k
          0.0,
174
842k
          0.7071067811865476f,
175
842k
          0.0,
176
842k
          0.0,
177
842k
          0.0,
178
842k
          0.0,
179
842k
          0.0,
180
842k
          0.0,
181
842k
          0.0,
182
842k
          0.0,
183
842k
          0.0,
184
842k
          0.0,
185
842k
          0.0,
186
842k
      },
187
842k
      {
188
842k
          -0.4105377591765233f,
189
842k
          0.6235485373547691f,
190
842k
          -0.06435071657946274f,
191
842k
          -0.06435071657946266f,
192
842k
          0.6235485373547694f,
193
842k
          -0.06435071657946284f,
194
842k
          -0.0643507165794628f,
195
842k
          -0.06435071657946274f,
196
842k
          -0.06435071657946272f,
197
842k
          -0.06435071657946279f,
198
842k
          -0.06435071657946266f,
199
842k
          -0.06435071657946277f,
200
842k
          -0.06435071657946277f,
201
842k
          -0.06435071657946273f,
202
842k
          -0.06435071657946274f,
203
842k
          -0.0643507165794626f,
204
842k
      },
205
842k
      {
206
842k
          0.0,
207
842k
          0.0,
208
842k
          -0.4517556589999482f,
209
842k
          0.15854503551840063f,
210
842k
          0.0,
211
842k
          -0.04038515160822202f,
212
842k
          0.0074182263792423875f,
213
842k
          0.39351034269210167f,
214
842k
          -0.45175565899994635f,
215
842k
          0.007418226379244351f,
216
842k
          0.1107416575309343f,
217
842k
          0.08298163094882051f,
218
842k
          0.15854503551839705f,
219
842k
          0.3935103426921022f,
220
842k
          0.0829816309488214f,
221
842k
          -0.45175565899994796f,
222
842k
      },
223
842k
      {
224
842k
          0.0,
225
842k
          0.0,
226
842k
          -0.304684750724869f,
227
842k
          0.5112616136591823f,
228
842k
          0.0,
229
842k
          0.0,
230
842k
          -0.290480129728998f,
231
842k
          -0.06578701549142804f,
232
842k
          0.304684750724884f,
233
842k
          0.2904801297290076f,
234
842k
          0.0,
235
842k
          -0.23889773523344604f,
236
842k
          -0.5112616136592012f,
237
842k
          0.06578701549142545f,
238
842k
          0.23889773523345467f,
239
842k
          0.0,
240
842k
      },
241
842k
      {
242
842k
          0.0,
243
842k
          0.0,
244
842k
          0.3017929516615495f,
245
842k
          0.25792362796341184f,
246
842k
          0.0,
247
842k
          0.16272340142866204f,
248
842k
          0.09520022653475037f,
249
842k
          0.0,
250
842k
          0.3017929516615503f,
251
842k
          0.09520022653475055f,
252
842k
          -0.16272340142866173f,
253
842k
          -0.35312385449816297f,
254
842k
          0.25792362796341295f,
255
842k
          0.0,
256
842k
          -0.3531238544981624f,
257
842k
          -0.6035859033230976f,
258
842k
      },
259
842k
      {
260
842k
          0.0,
261
842k
          0.0,
262
842k
          0.40824829046386274f,
263
842k
          0.0,
264
842k
          0.0,
265
842k
          0.0,
266
842k
          0.0,
267
842k
          -0.4082482904638628f,
268
842k
          -0.4082482904638635f,
269
842k
          0.0,
270
842k
          0.0,
271
842k
          -0.40824829046386296f,
272
842k
          0.0,
273
842k
          0.4082482904638634f,
274
842k
          0.408248290463863f,
275
842k
          0.0,
276
842k
      },
277
842k
      {
278
842k
          0.0,
279
842k
          0.0,
280
842k
          0.1747866975480809f,
281
842k
          0.0812611176717539f,
282
842k
          0.0,
283
842k
          0.0,
284
842k
          -0.3675398009862027f,
285
842k
          -0.307882213957909f,
286
842k
          -0.17478669754808135f,
287
842k
          0.3675398009862011f,
288
842k
          0.0,
289
842k
          0.4826689115059883f,
290
842k
          -0.08126111767175039f,
291
842k
          0.30788221395790305f,
292
842k
          -0.48266891150598584f,
293
842k
          0.0,
294
842k
      },
295
842k
      {
296
842k
          0.0,
297
842k
          0.0,
298
842k
          -0.21105601049335784f,
299
842k
          0.18567180916109802f,
300
842k
          0.0,
301
842k
          0.0,
302
842k
          0.49215859013738733f,
303
842k
          -0.38525013709251915f,
304
842k
          0.21105601049335806f,
305
842k
          -0.49215859013738905f,
306
842k
          0.0,
307
842k
          0.17419412659916217f,
308
842k
          -0.18567180916109904f,
309
842k
          0.3852501370925211f,
310
842k
          -0.1741941265991621f,
311
842k
          0.0,
312
842k
      },
313
842k
      {
314
842k
          0.0,
315
842k
          0.0,
316
842k
          -0.14266084808807264f,
317
842k
          -0.3416446842253372f,
318
842k
          0.0,
319
842k
          0.7367497537172237f,
320
842k
          0.24627107722075148f,
321
842k
          -0.08574019035519306f,
322
842k
          -0.14266084808807344f,
323
842k
          0.24627107722075137f,
324
842k
          0.14883399227113567f,
325
842k
          -0.04768680350229251f,
326
842k
          -0.3416446842253373f,
327
842k
          -0.08574019035519267f,
328
842k
          -0.047686803502292804f,
329
842k
          -0.14266084808807242f,
330
842k
      },
331
842k
      {
332
842k
          0.0,
333
842k
          0.0,
334
842k
          -0.13813540350758585f,
335
842k
          0.3302282550303788f,
336
842k
          0.0,
337
842k
          0.08755115000587084f,
338
842k
          -0.07946706605909573f,
339
842k
          -0.4613374887461511f,
340
842k
          -0.13813540350758294f,
341
842k
          -0.07946706605910261f,
342
842k
          0.49724647109535086f,
343
842k
          0.12538059448563663f,
344
842k
          0.3302282550303805f,
345
842k
          -0.4613374887461554f,
346
842k
          0.12538059448564315f,
347
842k
          -0.13813540350758452f,
348
842k
      },
349
842k
      {
350
842k
          0.0,
351
842k
          0.0,
352
842k
          -0.17437602599651067f,
353
842k
          0.0702790691196284f,
354
842k
          0.0,
355
842k
          -0.2921026642334881f,
356
842k
          0.3623817333531167f,
357
842k
          0.0,
358
842k
          -0.1743760259965108f,
359
842k
          0.36238173335311646f,
360
842k
          0.29210266423348785f,
361
842k
          -0.4326608024727445f,
362
842k
          0.07027906911962818f,
363
842k
          0.0,
364
842k
          -0.4326608024727457f,
365
842k
          0.34875205199302267f,
366
842k
      },
367
842k
      {
368
842k
          0.0,
369
842k
          0.0,
370
842k
          0.11354987314994337f,
371
842k
          -0.07417504595810355f,
372
842k
          0.0,
373
842k
          0.19402893032594343f,
374
842k
          -0.435190496523228f,
375
842k
          0.21918684838857466f,
376
842k
          0.11354987314994257f,
377
842k
          -0.4351904965232251f,
378
842k
          0.5550443808910661f,
379
842k
          -0.25468277124066463f,
380
842k
          -0.07417504595810233f,
381
842k
          0.2191868483885728f,
382
842k
          -0.25468277124066413f,
383
842k
          0.1135498731499429f,
384
842k
      },
385
842k
  };
386
387
842k
  const HWY_CAPPED(float, 16) d;
388
2.52M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
1.68M
    auto pixel = Zero(d);
390
28.6M
    for (size_t j = 0; j < 16; j++) {
391
26.9M
      auto cf = Set(d, coeffs[j]);
392
26.9M
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
26.9M
      pixel = MulAdd(cf, basis, pixel);
394
26.9M
    }
395
1.68M
    Store(pixel, d, pixels + i);
396
1.68M
  }
397
842k
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
398
399
template <size_t afv_kind>
400
void AFVTransformToPixels(const float* JXL_RESTRICT coefficients,
401
46.8M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
46.8M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
46.8M
  size_t afv_x = afv_kind & 1;
404
46.8M
  size_t afv_y = afv_kind / 2;
405
46.8M
  float dcs[3] = {};
406
46.8M
  float block00 = coefficients[0];
407
46.8M
  float block01 = coefficients[1];
408
46.8M
  float block10 = coefficients[8];
409
46.8M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
46.8M
  dcs[1] = (block00 + block10 - block01);
411
46.8M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
46.8M
  HWY_ALIGN float coeff[4 * 4];
414
46.8M
  coeff[0] = dcs[0];
415
234M
  for (size_t iy = 0; iy < 4; iy++) {
416
936M
    for (size_t ix = 0; ix < 4; ix++) {
417
749M
      if (ix == 0 && iy == 0) continue;
418
702M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
702M
    }
420
187M
  }
421
46.8M
  HWY_ALIGN float block[4 * 8];
422
46.8M
  AFVIDCT4x4(coeff, block);
423
234M
  for (size_t iy = 0; iy < 4; iy++) {
424
936M
    for (size_t ix = 0; ix < 4; ix++) {
425
749M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
749M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
749M
    }
428
187M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
46.8M
  block[0] = dcs[1];
431
234M
  for (size_t iy = 0; iy < 4; iy++) {
432
936M
    for (size_t ix = 0; ix < 4; ix++) {
433
749M
      if (ix == 0 && iy == 0) continue;
434
702M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
702M
    }
436
187M
  }
437
46.8M
  ComputeScaledIDCT<4, 4>()(
438
46.8M
      block,
439
46.8M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
46.8M
            pixels_stride),
441
46.8M
      scratch_space);
442
  // IDCT4x8.
443
46.8M
  block[0] = dcs[2];
444
234M
  for (size_t iy = 0; iy < 4; iy++) {
445
1.68G
    for (size_t ix = 0; ix < 8; ix++) {
446
1.49G
      if (ix == 0 && iy == 0) continue;
447
1.45G
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
1.45G
    }
449
187M
  }
450
46.8M
  ComputeScaledIDCT<4, 8>()(
451
46.8M
      block,
452
46.8M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
46.8M
      scratch_space);
454
46.8M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
11.4M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
11.4M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
11.4M
  size_t afv_x = afv_kind & 1;
404
11.4M
  size_t afv_y = afv_kind / 2;
405
11.4M
  float dcs[3] = {};
406
11.4M
  float block00 = coefficients[0];
407
11.4M
  float block01 = coefficients[1];
408
11.4M
  float block10 = coefficients[8];
409
11.4M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
11.4M
  dcs[1] = (block00 + block10 - block01);
411
11.4M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
11.4M
  HWY_ALIGN float coeff[4 * 4];
414
11.4M
  coeff[0] = dcs[0];
415
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
416
229M
    for (size_t ix = 0; ix < 4; ix++) {
417
183M
      if (ix == 0 && iy == 0) continue;
418
172M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
172M
    }
420
45.9M
  }
421
11.4M
  HWY_ALIGN float block[4 * 8];
422
11.4M
  AFVIDCT4x4(coeff, block);
423
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
424
229M
    for (size_t ix = 0; ix < 4; ix++) {
425
183M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
183M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
183M
    }
428
45.9M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
11.4M
  block[0] = dcs[1];
431
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
432
229M
    for (size_t ix = 0; ix < 4; ix++) {
433
183M
      if (ix == 0 && iy == 0) continue;
434
172M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
172M
    }
436
45.9M
  }
437
11.4M
  ComputeScaledIDCT<4, 4>()(
438
11.4M
      block,
439
11.4M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
11.4M
            pixels_stride),
441
11.4M
      scratch_space);
442
  // IDCT4x8.
443
11.4M
  block[0] = dcs[2];
444
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
445
413M
    for (size_t ix = 0; ix < 8; ix++) {
446
367M
      if (ix == 0 && iy == 0) continue;
447
356M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
356M
    }
449
45.9M
  }
450
11.4M
  ComputeScaledIDCT<4, 8>()(
451
11.4M
      block,
452
11.4M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
11.4M
      scratch_space);
454
11.4M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
11.4M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
11.4M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
11.4M
  size_t afv_x = afv_kind & 1;
404
11.4M
  size_t afv_y = afv_kind / 2;
405
11.4M
  float dcs[3] = {};
406
11.4M
  float block00 = coefficients[0];
407
11.4M
  float block01 = coefficients[1];
408
11.4M
  float block10 = coefficients[8];
409
11.4M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
11.4M
  dcs[1] = (block00 + block10 - block01);
411
11.4M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
11.4M
  HWY_ALIGN float coeff[4 * 4];
414
11.4M
  coeff[0] = dcs[0];
415
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
416
229M
    for (size_t ix = 0; ix < 4; ix++) {
417
183M
      if (ix == 0 && iy == 0) continue;
418
172M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
172M
    }
420
45.9M
  }
421
11.4M
  HWY_ALIGN float block[4 * 8];
422
11.4M
  AFVIDCT4x4(coeff, block);
423
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
424
229M
    for (size_t ix = 0; ix < 4; ix++) {
425
183M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
183M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
183M
    }
428
45.9M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
11.4M
  block[0] = dcs[1];
431
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
432
229M
    for (size_t ix = 0; ix < 4; ix++) {
433
183M
      if (ix == 0 && iy == 0) continue;
434
172M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
172M
    }
436
45.9M
  }
437
11.4M
  ComputeScaledIDCT<4, 4>()(
438
11.4M
      block,
439
11.4M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
11.4M
            pixels_stride),
441
11.4M
      scratch_space);
442
  // IDCT4x8.
443
11.4M
  block[0] = dcs[2];
444
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
445
413M
    for (size_t ix = 0; ix < 8; ix++) {
446
367M
      if (ix == 0 && iy == 0) continue;
447
356M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
356M
    }
449
45.9M
  }
450
11.4M
  ComputeScaledIDCT<4, 8>()(
451
11.4M
      block,
452
11.4M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
11.4M
      scratch_space);
454
11.4M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
11.4M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
11.4M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
11.4M
  size_t afv_x = afv_kind & 1;
404
11.4M
  size_t afv_y = afv_kind / 2;
405
11.4M
  float dcs[3] = {};
406
11.4M
  float block00 = coefficients[0];
407
11.4M
  float block01 = coefficients[1];
408
11.4M
  float block10 = coefficients[8];
409
11.4M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
11.4M
  dcs[1] = (block00 + block10 - block01);
411
11.4M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
11.4M
  HWY_ALIGN float coeff[4 * 4];
414
11.4M
  coeff[0] = dcs[0];
415
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
416
229M
    for (size_t ix = 0; ix < 4; ix++) {
417
183M
      if (ix == 0 && iy == 0) continue;
418
172M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
172M
    }
420
45.9M
  }
421
11.4M
  HWY_ALIGN float block[4 * 8];
422
11.4M
  AFVIDCT4x4(coeff, block);
423
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
424
229M
    for (size_t ix = 0; ix < 4; ix++) {
425
183M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
183M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
183M
    }
428
45.9M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
11.4M
  block[0] = dcs[1];
431
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
432
229M
    for (size_t ix = 0; ix < 4; ix++) {
433
183M
      if (ix == 0 && iy == 0) continue;
434
172M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
172M
    }
436
45.9M
  }
437
11.4M
  ComputeScaledIDCT<4, 4>()(
438
11.4M
      block,
439
11.4M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
11.4M
            pixels_stride),
441
11.4M
      scratch_space);
442
  // IDCT4x8.
443
11.4M
  block[0] = dcs[2];
444
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
445
413M
    for (size_t ix = 0; ix < 8; ix++) {
446
367M
      if (ix == 0 && iy == 0) continue;
447
356M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
356M
    }
449
45.9M
  }
450
11.4M
  ComputeScaledIDCT<4, 8>()(
451
11.4M
      block,
452
11.4M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
11.4M
      scratch_space);
454
11.4M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
11.4M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
11.4M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
11.4M
  size_t afv_x = afv_kind & 1;
404
11.4M
  size_t afv_y = afv_kind / 2;
405
11.4M
  float dcs[3] = {};
406
11.4M
  float block00 = coefficients[0];
407
11.4M
  float block01 = coefficients[1];
408
11.4M
  float block10 = coefficients[8];
409
11.4M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
11.4M
  dcs[1] = (block00 + block10 - block01);
411
11.4M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
11.4M
  HWY_ALIGN float coeff[4 * 4];
414
11.4M
  coeff[0] = dcs[0];
415
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
416
229M
    for (size_t ix = 0; ix < 4; ix++) {
417
183M
      if (ix == 0 && iy == 0) continue;
418
172M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
172M
    }
420
45.9M
  }
421
11.4M
  HWY_ALIGN float block[4 * 8];
422
11.4M
  AFVIDCT4x4(coeff, block);
423
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
424
229M
    for (size_t ix = 0; ix < 4; ix++) {
425
183M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
183M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
183M
    }
428
45.9M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
11.4M
  block[0] = dcs[1];
431
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
432
229M
    for (size_t ix = 0; ix < 4; ix++) {
433
183M
      if (ix == 0 && iy == 0) continue;
434
172M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
172M
    }
436
45.9M
  }
437
11.4M
  ComputeScaledIDCT<4, 4>()(
438
11.4M
      block,
439
11.4M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
11.4M
            pixels_stride),
441
11.4M
      scratch_space);
442
  // IDCT4x8.
443
11.4M
  block[0] = dcs[2];
444
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
445
413M
    for (size_t ix = 0; ix < 8; ix++) {
446
367M
      if (ix == 0 && iy == 0) continue;
447
356M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
356M
    }
449
45.9M
  }
450
11.4M
  ComputeScaledIDCT<4, 8>()(
451
11.4M
      block,
452
11.4M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
11.4M
      scratch_space);
454
11.4M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
288k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
288k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
288k
  size_t afv_x = afv_kind & 1;
404
288k
  size_t afv_y = afv_kind / 2;
405
288k
  float dcs[3] = {};
406
288k
  float block00 = coefficients[0];
407
288k
  float block01 = coefficients[1];
408
288k
  float block10 = coefficients[8];
409
288k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
288k
  dcs[1] = (block00 + block10 - block01);
411
288k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
288k
  HWY_ALIGN float coeff[4 * 4];
414
288k
  coeff[0] = dcs[0];
415
1.44M
  for (size_t iy = 0; iy < 4; iy++) {
416
5.77M
    for (size_t ix = 0; ix < 4; ix++) {
417
4.61M
      if (ix == 0 && iy == 0) continue;
418
4.33M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
4.33M
    }
420
1.15M
  }
421
288k
  HWY_ALIGN float block[4 * 8];
422
288k
  AFVIDCT4x4(coeff, block);
423
1.44M
  for (size_t iy = 0; iy < 4; iy++) {
424
5.77M
    for (size_t ix = 0; ix < 4; ix++) {
425
4.61M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
4.61M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
4.61M
    }
428
1.15M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
288k
  block[0] = dcs[1];
431
1.44M
  for (size_t iy = 0; iy < 4; iy++) {
432
5.77M
    for (size_t ix = 0; ix < 4; ix++) {
433
4.61M
      if (ix == 0 && iy == 0) continue;
434
4.33M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
4.33M
    }
436
1.15M
  }
437
288k
  ComputeScaledIDCT<4, 4>()(
438
288k
      block,
439
288k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
288k
            pixels_stride),
441
288k
      scratch_space);
442
  // IDCT4x8.
443
288k
  block[0] = dcs[2];
444
1.44M
  for (size_t iy = 0; iy < 4; iy++) {
445
10.3M
    for (size_t ix = 0; ix < 8; ix++) {
446
9.23M
      if (ix == 0 && iy == 0) continue;
447
8.95M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
8.95M
    }
449
1.15M
  }
450
288k
  ComputeScaledIDCT<4, 8>()(
451
288k
      block,
452
288k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
288k
      scratch_space);
454
288k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
154k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
154k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
154k
  size_t afv_x = afv_kind & 1;
404
154k
  size_t afv_y = afv_kind / 2;
405
154k
  float dcs[3] = {};
406
154k
  float block00 = coefficients[0];
407
154k
  float block01 = coefficients[1];
408
154k
  float block10 = coefficients[8];
409
154k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
154k
  dcs[1] = (block00 + block10 - block01);
411
154k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
154k
  HWY_ALIGN float coeff[4 * 4];
414
154k
  coeff[0] = dcs[0];
415
774k
  for (size_t iy = 0; iy < 4; iy++) {
416
3.09M
    for (size_t ix = 0; ix < 4; ix++) {
417
2.47M
      if (ix == 0 && iy == 0) continue;
418
2.32M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
2.32M
    }
420
619k
  }
421
154k
  HWY_ALIGN float block[4 * 8];
422
154k
  AFVIDCT4x4(coeff, block);
423
774k
  for (size_t iy = 0; iy < 4; iy++) {
424
3.09M
    for (size_t ix = 0; ix < 4; ix++) {
425
2.47M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
2.47M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
2.47M
    }
428
619k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
154k
  block[0] = dcs[1];
431
774k
  for (size_t iy = 0; iy < 4; iy++) {
432
3.09M
    for (size_t ix = 0; ix < 4; ix++) {
433
2.47M
      if (ix == 0 && iy == 0) continue;
434
2.32M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
2.32M
    }
436
619k
  }
437
154k
  ComputeScaledIDCT<4, 4>()(
438
154k
      block,
439
154k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
154k
            pixels_stride),
441
154k
      scratch_space);
442
  // IDCT4x8.
443
154k
  block[0] = dcs[2];
444
774k
  for (size_t iy = 0; iy < 4; iy++) {
445
5.57M
    for (size_t ix = 0; ix < 8; ix++) {
446
4.95M
      if (ix == 0 && iy == 0) continue;
447
4.79M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
4.79M
    }
449
619k
  }
450
154k
  ComputeScaledIDCT<4, 8>()(
451
154k
      block,
452
154k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
154k
      scratch_space);
454
154k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
196k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
196k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
196k
  size_t afv_x = afv_kind & 1;
404
196k
  size_t afv_y = afv_kind / 2;
405
196k
  float dcs[3] = {};
406
196k
  float block00 = coefficients[0];
407
196k
  float block01 = coefficients[1];
408
196k
  float block10 = coefficients[8];
409
196k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
196k
  dcs[1] = (block00 + block10 - block01);
411
196k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
196k
  HWY_ALIGN float coeff[4 * 4];
414
196k
  coeff[0] = dcs[0];
415
984k
  for (size_t iy = 0; iy < 4; iy++) {
416
3.93M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.15M
      if (ix == 0 && iy == 0) continue;
418
2.95M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
2.95M
    }
420
787k
  }
421
196k
  HWY_ALIGN float block[4 * 8];
422
196k
  AFVIDCT4x4(coeff, block);
423
984k
  for (size_t iy = 0; iy < 4; iy++) {
424
3.93M
    for (size_t ix = 0; ix < 4; ix++) {
425
3.15M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
3.15M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
3.15M
    }
428
787k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
196k
  block[0] = dcs[1];
431
984k
  for (size_t iy = 0; iy < 4; iy++) {
432
3.93M
    for (size_t ix = 0; ix < 4; ix++) {
433
3.15M
      if (ix == 0 && iy == 0) continue;
434
2.95M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
2.95M
    }
436
787k
  }
437
196k
  ComputeScaledIDCT<4, 4>()(
438
196k
      block,
439
196k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
196k
            pixels_stride),
441
196k
      scratch_space);
442
  // IDCT4x8.
443
196k
  block[0] = dcs[2];
444
984k
  for (size_t iy = 0; iy < 4; iy++) {
445
7.09M
    for (size_t ix = 0; ix < 8; ix++) {
446
6.30M
      if (ix == 0 && iy == 0) continue;
447
6.10M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
6.10M
    }
449
787k
  }
450
196k
  ComputeScaledIDCT<4, 8>()(
451
196k
      block,
452
196k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
196k
      scratch_space);
454
196k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
202k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
202k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
202k
  size_t afv_x = afv_kind & 1;
404
202k
  size_t afv_y = afv_kind / 2;
405
202k
  float dcs[3] = {};
406
202k
  float block00 = coefficients[0];
407
202k
  float block01 = coefficients[1];
408
202k
  float block10 = coefficients[8];
409
202k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
202k
  dcs[1] = (block00 + block10 - block01);
411
202k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
202k
  HWY_ALIGN float coeff[4 * 4];
414
202k
  coeff[0] = dcs[0];
415
1.01M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.04M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.23M
      if (ix == 0 && iy == 0) continue;
418
3.03M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
3.03M
    }
420
809k
  }
421
202k
  HWY_ALIGN float block[4 * 8];
422
202k
  AFVIDCT4x4(coeff, block);
423
1.01M
  for (size_t iy = 0; iy < 4; iy++) {
424
4.04M
    for (size_t ix = 0; ix < 4; ix++) {
425
3.23M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
3.23M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
3.23M
    }
428
809k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
202k
  block[0] = dcs[1];
431
1.01M
  for (size_t iy = 0; iy < 4; iy++) {
432
4.04M
    for (size_t ix = 0; ix < 4; ix++) {
433
3.23M
      if (ix == 0 && iy == 0) continue;
434
3.03M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
3.03M
    }
436
809k
  }
437
202k
  ComputeScaledIDCT<4, 4>()(
438
202k
      block,
439
202k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
202k
            pixels_stride),
441
202k
      scratch_space);
442
  // IDCT4x8.
443
202k
  block[0] = dcs[2];
444
1.01M
  for (size_t iy = 0; iy < 4; iy++) {
445
7.28M
    for (size_t ix = 0; ix < 8; ix++) {
446
6.47M
      if (ix == 0 && iy == 0) continue;
447
6.27M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
6.27M
    }
449
809k
  }
450
202k
  ComputeScaledIDCT<4, 8>()(
451
202k
      block,
452
202k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
202k
      scratch_space);
454
202k
}
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
455
456
HWY_MAYBE_UNUSED void TransformToPixels(const AcStrategyType strategy,
457
                                        float* JXL_RESTRICT coefficients,
458
                                        float* JXL_RESTRICT pixels,
459
                                        size_t pixels_stride,
460
160M
                                        float* scratch_space) {
461
160M
  using Type = AcStrategyType;
462
160M
  switch (strategy) {
463
14.5M
    case Type::IDENTITY: {
464
14.5M
      float dcs[4] = {};
465
14.5M
      float block00 = coefficients[0];
466
14.5M
      float block01 = coefficients[1];
467
14.5M
      float block10 = coefficients[8];
468
14.5M
      float block11 = coefficients[9];
469
14.5M
      dcs[0] = block00 + block01 + block10 + block11;
470
14.5M
      dcs[1] = block00 + block01 - block10 - block11;
471
14.5M
      dcs[2] = block00 - block01 + block10 - block11;
472
14.5M
      dcs[3] = block00 - block01 - block10 + block11;
473
43.5M
      for (size_t y = 0; y < 2; y++) {
474
87.0M
        for (size_t x = 0; x < 2; x++) {
475
58.0M
          float block_dc = dcs[y * 2 + x];
476
58.0M
          float residual_sum = 0;
477
290M
          for (size_t iy = 0; iy < 4; iy++) {
478
1.16G
            for (size_t ix = 0; ix < 4; ix++) {
479
928M
              if (ix == 0 && iy == 0) continue;
480
870M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
870M
            }
482
232M
          }
483
58.0M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
58.0M
              block_dc - residual_sum * (1.0f / 16);
485
290M
          for (size_t iy = 0; iy < 4; iy++) {
486
1.16G
            for (size_t ix = 0; ix < 4; ix++) {
487
928M
              if (ix == 1 && iy == 1) continue;
488
870M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
870M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
870M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
870M
            }
492
232M
          }
493
58.0M
          pixels[y * 4 * pixels_stride + x * 4] =
494
58.0M
              coefficients[(y + 2) * 8 + x + 2] +
495
58.0M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
58.0M
        }
497
29.0M
      }
498
14.5M
      break;
499
0
    }
500
11.9M
    case Type::DCT8X4: {
501
11.9M
      float dcs[2] = {};
502
11.9M
      float block0 = coefficients[0];
503
11.9M
      float block1 = coefficients[8];
504
11.9M
      dcs[0] = block0 + block1;
505
11.9M
      dcs[1] = block0 - block1;
506
35.7M
      for (size_t x = 0; x < 2; x++) {
507
23.8M
        HWY_ALIGN float block[4 * 8];
508
23.8M
        block[0] = dcs[x];
509
119M
        for (size_t iy = 0; iy < 4; iy++) {
510
856M
          for (size_t ix = 0; ix < 8; ix++) {
511
761M
            if (ix == 0 && iy == 0) continue;
512
737M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
737M
          }
514
95.2M
        }
515
23.8M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
23.8M
                                  scratch_space);
517
23.8M
      }
518
11.9M
      break;
519
0
    }
520
11.6M
    case Type::DCT4X8: {
521
11.6M
      float dcs[2] = {};
522
11.6M
      float block0 = coefficients[0];
523
11.6M
      float block1 = coefficients[8];
524
11.6M
      dcs[0] = block0 + block1;
525
11.6M
      dcs[1] = block0 - block1;
526
35.0M
      for (size_t y = 0; y < 2; y++) {
527
23.3M
        HWY_ALIGN float block[4 * 8];
528
23.3M
        block[0] = dcs[y];
529
116M
        for (size_t iy = 0; iy < 4; iy++) {
530
841M
          for (size_t ix = 0; ix < 8; ix++) {
531
748M
            if (ix == 0 && iy == 0) continue;
532
724M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
724M
          }
534
93.5M
        }
535
23.3M
        ComputeScaledIDCT<4, 8>()(
536
23.3M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
23.3M
            scratch_space);
538
23.3M
      }
539
11.6M
      break;
540
0
    }
541
11.5M
    case Type::DCT4X4: {
542
11.5M
      float dcs[4] = {};
543
11.5M
      float block00 = coefficients[0];
544
11.5M
      float block01 = coefficients[1];
545
11.5M
      float block10 = coefficients[8];
546
11.5M
      float block11 = coefficients[9];
547
11.5M
      dcs[0] = block00 + block01 + block10 + block11;
548
11.5M
      dcs[1] = block00 + block01 - block10 - block11;
549
11.5M
      dcs[2] = block00 - block01 + block10 - block11;
550
11.5M
      dcs[3] = block00 - block01 - block10 + block11;
551
34.5M
      for (size_t y = 0; y < 2; y++) {
552
69.0M
        for (size_t x = 0; x < 2; x++) {
553
46.0M
          HWY_ALIGN float block[4 * 4];
554
46.0M
          block[0] = dcs[y * 2 + x];
555
230M
          for (size_t iy = 0; iy < 4; iy++) {
556
920M
            for (size_t ix = 0; ix < 4; ix++) {
557
736M
              if (ix == 0 && iy == 0) continue;
558
690M
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
690M
            }
560
184M
          }
561
46.0M
          ComputeScaledIDCT<4, 4>()(
562
46.0M
              block,
563
46.0M
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
46.0M
              scratch_space);
565
46.0M
        }
566
23.0M
      }
567
11.5M
      break;
568
0
    }
569
17.0M
    case Type::DCT2X2: {
570
17.0M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
17.0M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
17.0M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
17.0M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
17.0M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
153M
      for (size_t y = 0; y < kBlockDim; y++) {
576
1.22G
        for (size_t x = 0; x < kBlockDim; x++) {
577
1.09G
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
1.09G
        }
579
136M
      }
580
17.0M
      break;
581
0
    }
582
5.08M
    case Type::DCT16X16: {
583
5.08M
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
5.08M
                                  scratch_space);
585
5.08M
      break;
586
0
    }
587
9.79M
    case Type::DCT16X8: {
588
9.79M
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
9.79M
                                 scratch_space);
590
9.79M
      break;
591
0
    }
592
9.82M
    case Type::DCT8X16: {
593
9.82M
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
9.82M
                                 scratch_space);
595
9.82M
      break;
596
0
    }
597
708
    case Type::DCT32X8: {
598
708
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
708
                                 scratch_space);
600
708
      break;
601
0
    }
602
117
    case Type::DCT8X32: {
603
117
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
117
                                 scratch_space);
605
117
      break;
606
0
    }
607
1.95M
    case Type::DCT32X16: {
608
1.95M
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
1.95M
                                  scratch_space);
610
1.95M
      break;
611
0
    }
612
1.92M
    case Type::DCT16X32: {
613
1.92M
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
1.92M
                                  scratch_space);
615
1.92M
      break;
616
0
    }
617
1.15M
    case Type::DCT32X32: {
618
1.15M
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
1.15M
                                  scratch_space);
620
1.15M
      break;
621
0
    }
622
15.9M
    case Type::DCT: {
623
15.9M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
15.9M
                                scratch_space);
625
15.9M
      break;
626
0
    }
627
11.7M
    case Type::AFV0: {
628
11.7M
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
11.7M
      break;
630
0
    }
631
11.6M
    case Type::AFV1: {
632
11.6M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
11.6M
      break;
634
0
    }
635
11.6M
    case Type::AFV2: {
636
11.6M
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
11.6M
      break;
638
0
    }
639
11.7M
    case Type::AFV3: {
640
11.7M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
11.7M
      break;
642
0
    }
643
585k
    case Type::DCT64X32: {
644
585k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
585k
                                  scratch_space);
646
585k
      break;
647
0
    }
648
345k
    case Type::DCT32X64: {
649
345k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
345k
                                  scratch_space);
651
345k
      break;
652
0
    }
653
307k
    case Type::DCT64X64: {
654
307k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
307k
                                  scratch_space);
656
307k
      break;
657
0
    }
658
3
    case Type::DCT128X64: {
659
3
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
3
                                   scratch_space);
661
3
      break;
662
0
    }
663
6
    case Type::DCT64X128: {
664
6
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
6
                                   scratch_space);
666
6
      break;
667
0
    }
668
18
    case Type::DCT128X128: {
669
18
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
18
                                    scratch_space);
671
18
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
160M
  }
689
160M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
143M
                                        float* scratch_space) {
461
143M
  using Type = AcStrategyType;
462
143M
  switch (strategy) {
463
11.4M
    case Type::IDENTITY: {
464
11.4M
      float dcs[4] = {};
465
11.4M
      float block00 = coefficients[0];
466
11.4M
      float block01 = coefficients[1];
467
11.4M
      float block10 = coefficients[8];
468
11.4M
      float block11 = coefficients[9];
469
11.4M
      dcs[0] = block00 + block01 + block10 + block11;
470
11.4M
      dcs[1] = block00 + block01 - block10 - block11;
471
11.4M
      dcs[2] = block00 - block01 + block10 - block11;
472
11.4M
      dcs[3] = block00 - block01 - block10 + block11;
473
34.4M
      for (size_t y = 0; y < 2; y++) {
474
68.9M
        for (size_t x = 0; x < 2; x++) {
475
45.9M
          float block_dc = dcs[y * 2 + x];
476
45.9M
          float residual_sum = 0;
477
229M
          for (size_t iy = 0; iy < 4; iy++) {
478
919M
            for (size_t ix = 0; ix < 4; ix++) {
479
735M
              if (ix == 0 && iy == 0) continue;
480
689M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
689M
            }
482
183M
          }
483
45.9M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
45.9M
              block_dc - residual_sum * (1.0f / 16);
485
229M
          for (size_t iy = 0; iy < 4; iy++) {
486
919M
            for (size_t ix = 0; ix < 4; ix++) {
487
735M
              if (ix == 1 && iy == 1) continue;
488
689M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
689M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
689M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
689M
            }
492
183M
          }
493
45.9M
          pixels[y * 4 * pixels_stride + x * 4] =
494
45.9M
              coefficients[(y + 2) * 8 + x + 2] +
495
45.9M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
45.9M
        }
497
22.9M
      }
498
11.4M
      break;
499
0
    }
500
11.4M
    case Type::DCT8X4: {
501
11.4M
      float dcs[2] = {};
502
11.4M
      float block0 = coefficients[0];
503
11.4M
      float block1 = coefficients[8];
504
11.4M
      dcs[0] = block0 + block1;
505
11.4M
      dcs[1] = block0 - block1;
506
34.4M
      for (size_t x = 0; x < 2; x++) {
507
22.9M
        HWY_ALIGN float block[4 * 8];
508
22.9M
        block[0] = dcs[x];
509
114M
        for (size_t iy = 0; iy < 4; iy++) {
510
827M
          for (size_t ix = 0; ix < 8; ix++) {
511
735M
            if (ix == 0 && iy == 0) continue;
512
712M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
712M
          }
514
91.9M
        }
515
22.9M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
22.9M
                                  scratch_space);
517
22.9M
      }
518
11.4M
      break;
519
0
    }
520
11.4M
    case Type::DCT4X8: {
521
11.4M
      float dcs[2] = {};
522
11.4M
      float block0 = coefficients[0];
523
11.4M
      float block1 = coefficients[8];
524
11.4M
      dcs[0] = block0 + block1;
525
11.4M
      dcs[1] = block0 - block1;
526
34.4M
      for (size_t y = 0; y < 2; y++) {
527
22.9M
        HWY_ALIGN float block[4 * 8];
528
22.9M
        block[0] = dcs[y];
529
114M
        for (size_t iy = 0; iy < 4; iy++) {
530
827M
          for (size_t ix = 0; ix < 8; ix++) {
531
735M
            if (ix == 0 && iy == 0) continue;
532
712M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
712M
          }
534
91.9M
        }
535
22.9M
        ComputeScaledIDCT<4, 8>()(
536
22.9M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
22.9M
            scratch_space);
538
22.9M
      }
539
11.4M
      break;
540
0
    }
541
11.4M
    case Type::DCT4X4: {
542
11.4M
      float dcs[4] = {};
543
11.4M
      float block00 = coefficients[0];
544
11.4M
      float block01 = coefficients[1];
545
11.4M
      float block10 = coefficients[8];
546
11.4M
      float block11 = coefficients[9];
547
11.4M
      dcs[0] = block00 + block01 + block10 + block11;
548
11.4M
      dcs[1] = block00 + block01 - block10 - block11;
549
11.4M
      dcs[2] = block00 - block01 + block10 - block11;
550
11.4M
      dcs[3] = block00 - block01 - block10 + block11;
551
34.4M
      for (size_t y = 0; y < 2; y++) {
552
68.9M
        for (size_t x = 0; x < 2; x++) {
553
45.9M
          HWY_ALIGN float block[4 * 4];
554
45.9M
          block[0] = dcs[y * 2 + x];
555
229M
          for (size_t iy = 0; iy < 4; iy++) {
556
919M
            for (size_t ix = 0; ix < 4; ix++) {
557
735M
              if (ix == 0 && iy == 0) continue;
558
689M
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
689M
            }
560
183M
          }
561
45.9M
          ComputeScaledIDCT<4, 4>()(
562
45.9M
              block,
563
45.9M
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
45.9M
              scratch_space);
565
45.9M
        }
566
22.9M
      }
567
11.4M
      break;
568
0
    }
569
11.4M
    case Type::DCT2X2: {
570
11.4M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
11.4M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
11.4M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
11.4M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
11.4M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
103M
      for (size_t y = 0; y < kBlockDim; y++) {
576
827M
        for (size_t x = 0; x < kBlockDim; x++) {
577
735M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
735M
        }
579
91.9M
      }
580
11.4M
      break;
581
0
    }
582
4.64M
    case Type::DCT16X16: {
583
4.64M
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
4.64M
                                  scratch_space);
585
4.64M
      break;
586
0
    }
587
9.16M
    case Type::DCT16X8: {
588
9.16M
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
9.16M
                                 scratch_space);
590
9.16M
      break;
591
0
    }
592
9.14M
    case Type::DCT8X16: {
593
9.14M
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
9.14M
                                 scratch_space);
595
9.14M
      break;
596
0
    }
597
0
    case Type::DCT32X8: {
598
0
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
0
                                 scratch_space);
600
0
      break;
601
0
    }
602
0
    case Type::DCT8X32: {
603
0
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
0
                                 scratch_space);
605
0
      break;
606
0
    }
607
1.81M
    case Type::DCT32X16: {
608
1.81M
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
1.81M
                                  scratch_space);
610
1.81M
      break;
611
0
    }
612
1.79M
    case Type::DCT16X32: {
613
1.79M
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
1.79M
                                  scratch_space);
615
1.79M
      break;
616
0
    }
617
919k
    case Type::DCT32X32: {
618
919k
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
919k
                                  scratch_space);
620
919k
      break;
621
0
    }
622
11.4M
    case Type::DCT: {
623
11.4M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
11.4M
                                scratch_space);
625
11.4M
      break;
626
0
    }
627
11.4M
    case Type::AFV0: {
628
11.4M
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
11.4M
      break;
630
0
    }
631
11.4M
    case Type::AFV1: {
632
11.4M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
11.4M
      break;
634
0
    }
635
11.4M
    case Type::AFV2: {
636
11.4M
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
11.4M
      break;
638
0
    }
639
11.4M
    case Type::AFV3: {
640
11.4M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
11.4M
      break;
642
0
    }
643
548k
    case Type::DCT64X32: {
644
548k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
548k
                                  scratch_space);
646
548k
      break;
647
0
    }
648
329k
    case Type::DCT32X64: {
649
329k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
329k
                                  scratch_space);
651
329k
      break;
652
0
    }
653
159k
    case Type::DCT64X64: {
654
159k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
159k
                                  scratch_space);
656
159k
      break;
657
0
    }
658
0
    case Type::DCT128X64: {
659
0
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT64X128: {
664
0
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
0
    case Type::DCT128X128: {
669
0
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
0
                                    scratch_space);
671
0
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
143M
  }
689
143M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
16.9M
                                        float* scratch_space) {
461
16.9M
  using Type = AcStrategyType;
462
16.9M
  switch (strategy) {
463
3.01M
    case Type::IDENTITY: {
464
3.01M
      float dcs[4] = {};
465
3.01M
      float block00 = coefficients[0];
466
3.01M
      float block01 = coefficients[1];
467
3.01M
      float block10 = coefficients[8];
468
3.01M
      float block11 = coefficients[9];
469
3.01M
      dcs[0] = block00 + block01 + block10 + block11;
470
3.01M
      dcs[1] = block00 + block01 - block10 - block11;
471
3.01M
      dcs[2] = block00 - block01 + block10 - block11;
472
3.01M
      dcs[3] = block00 - block01 - block10 + block11;
473
9.05M
      for (size_t y = 0; y < 2; y++) {
474
18.1M
        for (size_t x = 0; x < 2; x++) {
475
12.0M
          float block_dc = dcs[y * 2 + x];
476
12.0M
          float residual_sum = 0;
477
60.3M
          for (size_t iy = 0; iy < 4; iy++) {
478
241M
            for (size_t ix = 0; ix < 4; ix++) {
479
193M
              if (ix == 0 && iy == 0) continue;
480
181M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
181M
            }
482
48.2M
          }
483
12.0M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
12.0M
              block_dc - residual_sum * (1.0f / 16);
485
60.3M
          for (size_t iy = 0; iy < 4; iy++) {
486
241M
            for (size_t ix = 0; ix < 4; ix++) {
487
193M
              if (ix == 1 && iy == 1) continue;
488
181M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
181M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
181M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
181M
            }
492
48.2M
          }
493
12.0M
          pixels[y * 4 * pixels_stride + x * 4] =
494
12.0M
              coefficients[(y + 2) * 8 + x + 2] +
495
12.0M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
12.0M
        }
497
6.03M
      }
498
3.01M
      break;
499
0
    }
500
403k
    case Type::DCT8X4: {
501
403k
      float dcs[2] = {};
502
403k
      float block0 = coefficients[0];
503
403k
      float block1 = coefficients[8];
504
403k
      dcs[0] = block0 + block1;
505
403k
      dcs[1] = block0 - block1;
506
1.20M
      for (size_t x = 0; x < 2; x++) {
507
806k
        HWY_ALIGN float block[4 * 8];
508
806k
        block[0] = dcs[x];
509
4.03M
        for (size_t iy = 0; iy < 4; iy++) {
510
29.0M
          for (size_t ix = 0; ix < 8; ix++) {
511
25.8M
            if (ix == 0 && iy == 0) continue;
512
24.9M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
24.9M
          }
514
3.22M
        }
515
806k
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
806k
                                  scratch_space);
517
806k
      }
518
403k
      break;
519
0
    }
520
189k
    case Type::DCT4X8: {
521
189k
      float dcs[2] = {};
522
189k
      float block0 = coefficients[0];
523
189k
      float block1 = coefficients[8];
524
189k
      dcs[0] = block0 + block1;
525
189k
      dcs[1] = block0 - block1;
526
569k
      for (size_t y = 0; y < 2; y++) {
527
379k
        HWY_ALIGN float block[4 * 8];
528
379k
        block[0] = dcs[y];
529
1.89M
        for (size_t iy = 0; iy < 4; iy++) {
530
13.6M
          for (size_t ix = 0; ix < 8; ix++) {
531
12.1M
            if (ix == 0 && iy == 0) continue;
532
11.7M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
11.7M
          }
534
1.51M
        }
535
379k
        ComputeScaledIDCT<4, 8>()(
536
379k
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
379k
            scratch_space);
538
379k
      }
539
189k
      break;
540
0
    }
541
3.50k
    case Type::DCT4X4: {
542
3.50k
      float dcs[4] = {};
543
3.50k
      float block00 = coefficients[0];
544
3.50k
      float block01 = coefficients[1];
545
3.50k
      float block10 = coefficients[8];
546
3.50k
      float block11 = coefficients[9];
547
3.50k
      dcs[0] = block00 + block01 + block10 + block11;
548
3.50k
      dcs[1] = block00 + block01 - block10 - block11;
549
3.50k
      dcs[2] = block00 - block01 + block10 - block11;
550
3.50k
      dcs[3] = block00 - block01 - block10 + block11;
551
10.5k
      for (size_t y = 0; y < 2; y++) {
552
21.0k
        for (size_t x = 0; x < 2; x++) {
553
14.0k
          HWY_ALIGN float block[4 * 4];
554
14.0k
          block[0] = dcs[y * 2 + x];
555
70.0k
          for (size_t iy = 0; iy < 4; iy++) {
556
280k
            for (size_t ix = 0; ix < 4; ix++) {
557
224k
              if (ix == 0 && iy == 0) continue;
558
210k
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
210k
            }
560
56.0k
          }
561
14.0k
          ComputeScaledIDCT<4, 4>()(
562
14.0k
              block,
563
14.0k
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
14.0k
              scratch_space);
565
14.0k
        }
566
7.00k
      }
567
3.50k
      break;
568
0
    }
569
5.57M
    case Type::DCT2X2: {
570
5.57M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
5.57M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
5.57M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
5.57M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
5.57M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
50.1M
      for (size_t y = 0; y < kBlockDim; y++) {
576
401M
        for (size_t x = 0; x < kBlockDim; x++) {
577
356M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
356M
        }
579
44.5M
      }
580
5.57M
      break;
581
0
    }
582
445k
    case Type::DCT16X16: {
583
445k
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
445k
                                  scratch_space);
585
445k
      break;
586
0
    }
587
628k
    case Type::DCT16X8: {
588
628k
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
628k
                                 scratch_space);
590
628k
      break;
591
0
    }
592
683k
    case Type::DCT8X16: {
593
683k
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
683k
                                 scratch_space);
595
683k
      break;
596
0
    }
597
708
    case Type::DCT32X8: {
598
708
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
708
                                 scratch_space);
600
708
      break;
601
0
    }
602
117
    case Type::DCT8X32: {
603
117
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
117
                                 scratch_space);
605
117
      break;
606
0
    }
607
136k
    case Type::DCT32X16: {
608
136k
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
136k
                                  scratch_space);
610
136k
      break;
611
0
    }
612
133k
    case Type::DCT16X32: {
613
133k
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
133k
                                  scratch_space);
615
133k
      break;
616
0
    }
617
235k
    case Type::DCT32X32: {
618
235k
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
235k
                                  scratch_space);
620
235k
      break;
621
0
    }
622
4.47M
    case Type::DCT: {
623
4.47M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
4.47M
                                scratch_space);
625
4.47M
      break;
626
0
    }
627
288k
    case Type::AFV0: {
628
288k
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
288k
      break;
630
0
    }
631
154k
    case Type::AFV1: {
632
154k
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
154k
      break;
634
0
    }
635
196k
    case Type::AFV2: {
636
196k
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
196k
      break;
638
0
    }
639
202k
    case Type::AFV3: {
640
202k
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
202k
      break;
642
0
    }
643
36.2k
    case Type::DCT64X32: {
644
36.2k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
36.2k
                                  scratch_space);
646
36.2k
      break;
647
0
    }
648
15.3k
    case Type::DCT32X64: {
649
15.3k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
15.3k
                                  scratch_space);
651
15.3k
      break;
652
0
    }
653
147k
    case Type::DCT64X64: {
654
147k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
147k
                                  scratch_space);
656
147k
      break;
657
0
    }
658
3
    case Type::DCT128X64: {
659
3
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
3
                                   scratch_space);
661
3
      break;
662
0
    }
663
6
    case Type::DCT64X128: {
664
6
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
6
                                   scratch_space);
666
6
      break;
667
0
    }
668
18
    case Type::DCT128X128: {
669
18
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
18
                                    scratch_space);
671
18
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
16.9M
  }
689
16.9M
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
690
691
HWY_MAYBE_UNUSED void LowestFrequenciesFromDC(const AcStrategyType strategy,
692
                                              const float* dc, size_t dc_stride,
693
                                              float* llf,
694
17.2M
                                              float* JXL_RESTRICT scratch) {
695
17.2M
  using Type = AcStrategyType;
696
17.2M
  HWY_ALIGN float warm_block[4 * 4];
697
17.2M
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
17.2M
  switch (strategy) {
699
628k
    case Type::DCT16X8: {
700
628k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
628k
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
628k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
628k
      break;
704
0
    }
705
683k
    case Type::DCT8X16: {
706
683k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
683k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
683k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
683k
      break;
710
0
    }
711
445k
    case Type::DCT16X16: {
712
445k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
445k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
445k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
445k
      break;
716
0
    }
717
708
    case Type::DCT32X8: {
718
708
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
708
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
708
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
708
      break;
722
0
    }
723
117
    case Type::DCT8X32: {
724
117
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
117
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
117
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
117
      break;
728
0
    }
729
136k
    case Type::DCT32X16: {
730
136k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
136k
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
136k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
136k
      break;
734
0
    }
735
133k
    case Type::DCT16X32: {
736
133k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
133k
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
133k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
133k
      break;
740
0
    }
741
235k
    case Type::DCT32X32: {
742
235k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
235k
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
235k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
235k
      break;
746
0
    }
747
36.2k
    case Type::DCT64X32: {
748
36.2k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
36.2k
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
36.2k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
36.2k
      break;
752
0
    }
753
15.3k
    case Type::DCT32X64: {
754
15.3k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
15.3k
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
15.3k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
15.3k
      break;
758
0
    }
759
147k
    case Type::DCT64X64: {
760
147k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
147k
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
147k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
147k
      break;
764
0
    }
765
3
    case Type::DCT128X64: {
766
3
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
3
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
3
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
3
      break;
770
0
    }
771
6
    case Type::DCT64X128: {
772
6
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
6
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
6
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
6
      break;
776
0
    }
777
18
    case Type::DCT128X128: {
778
18
      ReinterpretingDCT<
779
18
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
18
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
18
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
18
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
0
    case Type::DCT256X256: {
799
0
      ReinterpretingDCT<
800
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
0
      break;
804
0
    }
805
4.51M
    case Type::DCT:
806
10.0M
    case Type::DCT2X2:
807
10.0M
    case Type::DCT4X4:
808
10.2M
    case Type::DCT4X8:
809
10.6M
    case Type::DCT8X4:
810
10.9M
    case Type::AFV0:
811
11.1M
    case Type::AFV1:
812
11.3M
    case Type::AFV2:
813
11.5M
    case Type::AFV3:
814
14.8M
    case Type::IDENTITY:
815
14.8M
      llf[0] = dc[0];
816
14.8M
      break;
817
17.2M
  };
818
17.2M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
694
17.2M
                                              float* JXL_RESTRICT scratch) {
695
17.2M
  using Type = AcStrategyType;
696
17.2M
  HWY_ALIGN float warm_block[4 * 4];
697
17.2M
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
17.2M
  switch (strategy) {
699
628k
    case Type::DCT16X8: {
700
628k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
628k
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
628k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
628k
      break;
704
0
    }
705
683k
    case Type::DCT8X16: {
706
683k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
683k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
683k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
683k
      break;
710
0
    }
711
445k
    case Type::DCT16X16: {
712
445k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
445k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
445k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
445k
      break;
716
0
    }
717
708
    case Type::DCT32X8: {
718
708
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
708
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
708
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
708
      break;
722
0
    }
723
117
    case Type::DCT8X32: {
724
117
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
117
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
117
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
117
      break;
728
0
    }
729
136k
    case Type::DCT32X16: {
730
136k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
136k
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
136k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
136k
      break;
734
0
    }
735
133k
    case Type::DCT16X32: {
736
133k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
133k
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
133k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
133k
      break;
740
0
    }
741
235k
    case Type::DCT32X32: {
742
235k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
235k
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
235k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
235k
      break;
746
0
    }
747
36.2k
    case Type::DCT64X32: {
748
36.2k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
36.2k
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
36.2k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
36.2k
      break;
752
0
    }
753
15.3k
    case Type::DCT32X64: {
754
15.3k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
15.3k
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
15.3k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
15.3k
      break;
758
0
    }
759
147k
    case Type::DCT64X64: {
760
147k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
147k
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
147k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
147k
      break;
764
0
    }
765
3
    case Type::DCT128X64: {
766
3
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
3
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
3
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
3
      break;
770
0
    }
771
6
    case Type::DCT64X128: {
772
6
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
6
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
6
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
6
      break;
776
0
    }
777
18
    case Type::DCT128X128: {
778
18
      ReinterpretingDCT<
779
18
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
18
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
18
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
18
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
0
    case Type::DCT256X256: {
799
0
      ReinterpretingDCT<
800
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
0
      break;
804
0
    }
805
4.51M
    case Type::DCT:
806
10.0M
    case Type::DCT2X2:
807
10.0M
    case Type::DCT4X4:
808
10.2M
    case Type::DCT4X8:
809
10.6M
    case Type::DCT8X4:
810
10.9M
    case Type::AFV0:
811
11.1M
    case Type::AFV1:
812
11.3M
    case Type::AFV2:
813
11.5M
    case Type::AFV3:
814
14.8M
    case Type::IDENTITY:
815
14.8M
      llf[0] = dc[0];
816
14.8M
      break;
817
17.2M
  };
818
17.2M
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
819
820
}  // namespace
821
// NOLINTNEXTLINE(google-readability-namespace-comments)
822
}  // namespace HWY_NAMESPACE
823
}  // namespace jxl
824
HWY_AFTER_NAMESPACE();
825
826
#endif  // LIB_JXL_DEC_TRANSFORMS_INL_H_