Coverage Report

Created: 2025-10-12 07:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/dec_transforms-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include <cstring>
7
8
#include "lib/jxl/base/compiler_specific.h"
9
#include "lib/jxl/frame_dimensions.h"
10
11
#if defined(LIB_JXL_DEC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
12
#ifdef LIB_JXL_DEC_TRANSFORMS_INL_H_
13
#undef LIB_JXL_DEC_TRANSFORMS_INL_H_
14
#else
15
#define LIB_JXL_DEC_TRANSFORMS_INL_H_
16
#endif
17
18
#include <cstddef>
19
#include <hwy/highway.h>
20
21
#include "lib/jxl/ac_strategy.h"
22
#include "lib/jxl/dct-inl.h"
23
#include "lib/jxl/dct_scales.h"
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
namespace HWY_NAMESPACE {
27
namespace {
28
29
// These templates are not found via ADL.
30
using hwy::HWY_NAMESPACE::MulAdd;
31
32
// Computes the lowest-frequency LF_ROWSxLF_COLS-sized square in output, which
33
// is a DCT_ROWS*DCT_COLS-sized DCT block, by doing a ROWS*COLS DCT on the
34
// input block.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
JXL_INLINE void ReinterpretingDCT(const float* input, const size_t input_stride,
38
                                  float* output, const size_t output_stride,
39
                                  float* JXL_RESTRICT block,
40
2.36M
                                  float* JXL_RESTRICT scratch_space) {
41
2.36M
  static_assert(LF_ROWS == ROWS,
42
2.36M
                "ReinterpretingDCT should only be called with LF == N");
43
2.36M
  static_assert(LF_COLS == COLS,
44
2.36M
                "ReinterpretingDCT should only be called with LF == N");
45
2.36M
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
2.36M
                                 scratch_space);
47
2.36M
  if (ROWS < COLS) {
48
1.81M
    for (size_t y = 0; y < LF_ROWS; y++) {
49
3.95M
      for (size_t x = 0; x < LF_COLS; x++) {
50
2.95M
        output[y * output_stride + x] =
51
2.95M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
2.95M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
2.95M
      }
54
1.00M
    }
55
1.54M
  } else {
56
5.25M
    for (size_t y = 0; y < LF_COLS; y++) {
57
20.1M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
16.4M
        output[y * output_stride + x] =
59
16.4M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
16.4M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
16.4M
      }
62
3.70M
    }
63
1.54M
  }
64
2.36M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
613k
                                  float* JXL_RESTRICT scratch_space) {
41
613k
  static_assert(LF_ROWS == ROWS,
42
613k
                "ReinterpretingDCT should only be called with LF == N");
43
613k
  static_assert(LF_COLS == COLS,
44
613k
                "ReinterpretingDCT should only be called with LF == N");
45
613k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
613k
                                 scratch_space);
47
613k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
613k
  } else {
56
1.22M
    for (size_t y = 0; y < LF_COLS; y++) {
57
1.83M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.22M
        output[y * output_stride + x] =
59
1.22M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.22M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.22M
      }
62
613k
    }
63
613k
  }
64
613k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
665k
                                  float* JXL_RESTRICT scratch_space) {
41
665k
  static_assert(LF_ROWS == ROWS,
42
665k
                "ReinterpretingDCT should only be called with LF == N");
43
665k
  static_assert(LF_COLS == COLS,
44
665k
                "ReinterpretingDCT should only be called with LF == N");
45
665k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
665k
                                 scratch_space);
47
665k
  if (ROWS < COLS) {
48
1.33M
    for (size_t y = 0; y < LF_ROWS; y++) {
49
1.99M
      for (size_t x = 0; x < LF_COLS; x++) {
50
1.33M
        output[y * output_stride + x] =
51
1.33M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
1.33M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
1.33M
      }
54
665k
    }
55
665k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
665k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
424k
                                  float* JXL_RESTRICT scratch_space) {
41
424k
  static_assert(LF_ROWS == ROWS,
42
424k
                "ReinterpretingDCT should only be called with LF == N");
43
424k
  static_assert(LF_COLS == COLS,
44
424k
                "ReinterpretingDCT should only be called with LF == N");
45
424k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
424k
                                 scratch_space);
47
424k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
424k
  } else {
56
1.27M
    for (size_t y = 0; y < LF_COLS; y++) {
57
2.54M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.69M
        output[y * output_stride + x] =
59
1.69M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.69M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.69M
      }
62
848k
    }
63
424k
  }
64
424k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
714
                                  float* JXL_RESTRICT scratch_space) {
41
714
  static_assert(LF_ROWS == ROWS,
42
714
                "ReinterpretingDCT should only be called with LF == N");
43
714
  static_assert(LF_COLS == COLS,
44
714
                "ReinterpretingDCT should only be called with LF == N");
45
714
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
714
                                 scratch_space);
47
714
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
714
  } else {
56
1.42k
    for (size_t y = 0; y < LF_COLS; y++) {
57
3.57k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
2.85k
        output[y * output_stride + x] =
59
2.85k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
2.85k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
2.85k
      }
62
714
    }
63
714
  }
64
714
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
108
                                  float* JXL_RESTRICT scratch_space) {
41
108
  static_assert(LF_ROWS == ROWS,
42
108
                "ReinterpretingDCT should only be called with LF == N");
43
108
  static_assert(LF_COLS == COLS,
44
108
                "ReinterpretingDCT should only be called with LF == N");
45
108
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
108
                                 scratch_space);
47
108
  if (ROWS < COLS) {
48
216
    for (size_t y = 0; y < LF_ROWS; y++) {
49
540
      for (size_t x = 0; x < LF_COLS; x++) {
50
432
        output[y * output_stride + x] =
51
432
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
432
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
432
      }
54
108
    }
55
108
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
108
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
137k
                                  float* JXL_RESTRICT scratch_space) {
41
137k
  static_assert(LF_ROWS == ROWS,
42
137k
                "ReinterpretingDCT should only be called with LF == N");
43
137k
  static_assert(LF_COLS == COLS,
44
137k
                "ReinterpretingDCT should only be called with LF == N");
45
137k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
137k
                                 scratch_space);
47
137k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
137k
  } else {
56
413k
    for (size_t y = 0; y < LF_COLS; y++) {
57
1.37M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.10M
        output[y * output_stride + x] =
59
1.10M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.10M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.10M
      }
62
275k
    }
63
137k
  }
64
137k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
134k
                                  float* JXL_RESTRICT scratch_space) {
41
134k
  static_assert(LF_ROWS == ROWS,
42
134k
                "ReinterpretingDCT should only be called with LF == N");
43
134k
  static_assert(LF_COLS == COLS,
44
134k
                "ReinterpretingDCT should only be called with LF == N");
45
134k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
134k
                                 scratch_space);
47
134k
  if (ROWS < COLS) {
48
402k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
1.34M
      for (size_t x = 0; x < LF_COLS; x++) {
50
1.07M
        output[y * output_stride + x] =
51
1.07M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
1.07M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
1.07M
      }
54
268k
    }
55
134k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
134k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
209k
                                  float* JXL_RESTRICT scratch_space) {
41
209k
  static_assert(LF_ROWS == ROWS,
42
209k
                "ReinterpretingDCT should only be called with LF == N");
43
209k
  static_assert(LF_COLS == COLS,
44
209k
                "ReinterpretingDCT should only be called with LF == N");
45
209k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
209k
                                 scratch_space);
47
209k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
209k
  } else {
56
1.04M
    for (size_t y = 0; y < LF_COLS; y++) {
57
4.18M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
3.34M
        output[y * output_stride + x] =
59
3.34M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
3.34M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
3.34M
      }
62
836k
    }
63
209k
  }
64
209k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
41.7k
                                  float* JXL_RESTRICT scratch_space) {
41
41.7k
  static_assert(LF_ROWS == ROWS,
42
41.7k
                "ReinterpretingDCT should only be called with LF == N");
43
41.7k
  static_assert(LF_COLS == COLS,
44
41.7k
                "ReinterpretingDCT should only be called with LF == N");
45
41.7k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
41.7k
                                 scratch_space);
47
41.7k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
41.7k
  } else {
56
208k
    for (size_t y = 0; y < LF_COLS; y++) {
57
1.50M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.33M
        output[y * output_stride + x] =
59
1.33M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.33M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.33M
      }
62
166k
    }
63
41.7k
  }
64
41.7k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
17.0k
                                  float* JXL_RESTRICT scratch_space) {
41
17.0k
  static_assert(LF_ROWS == ROWS,
42
17.0k
                "ReinterpretingDCT should only be called with LF == N");
43
17.0k
  static_assert(LF_COLS == COLS,
44
17.0k
                "ReinterpretingDCT should only be called with LF == N");
45
17.0k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
17.0k
                                 scratch_space);
47
17.0k
  if (ROWS < COLS) {
48
85.0k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
612k
      for (size_t x = 0; x < LF_COLS; x++) {
50
544k
        output[y * output_stride + x] =
51
544k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
544k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
544k
      }
54
68.0k
    }
55
17.0k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
17.0k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
120k
                                  float* JXL_RESTRICT scratch_space) {
41
120k
  static_assert(LF_ROWS == ROWS,
42
120k
                "ReinterpretingDCT should only be called with LF == N");
43
120k
  static_assert(LF_COLS == COLS,
44
120k
                "ReinterpretingDCT should only be called with LF == N");
45
120k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
120k
                                 scratch_space);
47
120k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
120k
  } else {
56
1.08M
    for (size_t y = 0; y < LF_COLS; y++) {
57
8.67M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
7.71M
        output[y * output_stride + x] =
59
7.71M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
7.71M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
7.71M
      }
62
964k
    }
63
120k
  }
64
120k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
3
                                  float* JXL_RESTRICT scratch_space) {
41
3
  static_assert(LF_ROWS == ROWS,
42
3
                "ReinterpretingDCT should only be called with LF == N");
43
3
  static_assert(LF_COLS == COLS,
44
3
                "ReinterpretingDCT should only be called with LF == N");
45
3
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
3
                                 scratch_space);
47
3
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
3
  } else {
56
27
    for (size_t y = 0; y < LF_COLS; y++) {
57
408
      for (size_t x = 0; x < LF_ROWS; x++) {
58
384
        output[y * output_stride + x] =
59
384
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
384
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
384
      }
62
24
    }
63
3
  }
64
3
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
9
                                  float* JXL_RESTRICT scratch_space) {
41
9
  static_assert(LF_ROWS == ROWS,
42
9
                "ReinterpretingDCT should only be called with LF == N");
43
9
  static_assert(LF_COLS == COLS,
44
9
                "ReinterpretingDCT should only be called with LF == N");
45
9
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
9
                                 scratch_space);
47
9
  if (ROWS < COLS) {
48
81
    for (size_t y = 0; y < LF_ROWS; y++) {
49
1.22k
      for (size_t x = 0; x < LF_COLS; x++) {
50
1.15k
        output[y * output_stride + x] =
51
1.15k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
1.15k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
1.15k
      }
54
72
    }
55
9
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
9
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
6
                                  float* JXL_RESTRICT scratch_space) {
41
6
  static_assert(LF_ROWS == ROWS,
42
6
                "ReinterpretingDCT should only be called with LF == N");
43
6
  static_assert(LF_COLS == COLS,
44
6
                "ReinterpretingDCT should only be called with LF == N");
45
6
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
6
                                 scratch_space);
47
6
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
6
  } else {
56
102
    for (size_t y = 0; y < LF_COLS; y++) {
57
1.63k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.53k
        output[y * output_stride + x] =
59
1.53k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.53k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.53k
      }
62
96
    }
63
6
  }
64
6
}
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
65
66
template <size_t S>
67
49.7M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
49.7M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
49.7M
  static_assert(S % 2 == 0, "S should be even");
70
49.7M
  float temp[kDCTBlockSize];
71
49.7M
  constexpr size_t num_2x2 = S / 2;
72
165M
  for (size_t y = 0; y < num_2x2; y++) {
73
464M
    for (size_t x = 0; x < num_2x2; x++) {
74
348M
      float c00 = block[y * kBlockDim + x];
75
348M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
348M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
348M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
348M
      float r00 = c00 + c01 + c10 + c11;
79
348M
      float r01 = c00 + c01 - c10 - c11;
80
348M
      float r10 = c00 - c01 + c10 - c11;
81
348M
      float r11 = c00 - c01 - c10 + c11;
82
348M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
348M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
348M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
348M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
348M
    }
87
116M
  }
88
281M
  for (size_t y = 0; y < S; y++) {
89
1.62G
    for (size_t x = 0; x < S; x++) {
90
1.39G
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
1.39G
    }
92
232M
  }
93
49.7M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
10.8M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
10.8M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
10.8M
  static_assert(S % 2 == 0, "S should be even");
70
10.8M
  float temp[kDCTBlockSize];
71
10.8M
  constexpr size_t num_2x2 = S / 2;
72
21.6M
  for (size_t y = 0; y < num_2x2; y++) {
73
21.6M
    for (size_t x = 0; x < num_2x2; x++) {
74
10.8M
      float c00 = block[y * kBlockDim + x];
75
10.8M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
10.8M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
10.8M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
10.8M
      float r00 = c00 + c01 + c10 + c11;
79
10.8M
      float r01 = c00 + c01 - c10 - c11;
80
10.8M
      float r10 = c00 - c01 + c10 - c11;
81
10.8M
      float r11 = c00 - c01 - c10 + c11;
82
10.8M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
10.8M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
10.8M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
10.8M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
10.8M
    }
87
10.8M
  }
88
32.4M
  for (size_t y = 0; y < S; y++) {
89
64.9M
    for (size_t x = 0; x < S; x++) {
90
43.3M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
43.3M
    }
92
21.6M
  }
93
10.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
10.8M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
10.8M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
10.8M
  static_assert(S % 2 == 0, "S should be even");
70
10.8M
  float temp[kDCTBlockSize];
71
10.8M
  constexpr size_t num_2x2 = S / 2;
72
32.4M
  for (size_t y = 0; y < num_2x2; y++) {
73
64.9M
    for (size_t x = 0; x < num_2x2; x++) {
74
43.3M
      float c00 = block[y * kBlockDim + x];
75
43.3M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
43.3M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
43.3M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
43.3M
      float r00 = c00 + c01 + c10 + c11;
79
43.3M
      float r01 = c00 + c01 - c10 - c11;
80
43.3M
      float r10 = c00 - c01 + c10 - c11;
81
43.3M
      float r11 = c00 - c01 - c10 + c11;
82
43.3M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
43.3M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
43.3M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
43.3M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
43.3M
    }
87
21.6M
  }
88
54.1M
  for (size_t y = 0; y < S; y++) {
89
216M
    for (size_t x = 0; x < S; x++) {
90
173M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
173M
    }
92
43.3M
  }
93
10.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
10.8M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
10.8M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
10.8M
  static_assert(S % 2 == 0, "S should be even");
70
10.8M
  float temp[kDCTBlockSize];
71
10.8M
  constexpr size_t num_2x2 = S / 2;
72
54.1M
  for (size_t y = 0; y < num_2x2; y++) {
73
216M
    for (size_t x = 0; x < num_2x2; x++) {
74
173M
      float c00 = block[y * kBlockDim + x];
75
173M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
173M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
173M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
173M
      float r00 = c00 + c01 + c10 + c11;
79
173M
      float r01 = c00 + c01 - c10 - c11;
80
173M
      float r10 = c00 - c01 + c10 - c11;
81
173M
      float r11 = c00 - c01 - c10 + c11;
82
173M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
173M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
173M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
173M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
173M
    }
87
43.3M
  }
88
97.4M
  for (size_t y = 0; y < S; y++) {
89
779M
    for (size_t x = 0; x < S; x++) {
90
692M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
692M
    }
92
86.6M
  }
93
10.8M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
5.76M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
5.76M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
5.76M
  static_assert(S % 2 == 0, "S should be even");
70
5.76M
  float temp[kDCTBlockSize];
71
5.76M
  constexpr size_t num_2x2 = S / 2;
72
11.5M
  for (size_t y = 0; y < num_2x2; y++) {
73
11.5M
    for (size_t x = 0; x < num_2x2; x++) {
74
5.76M
      float c00 = block[y * kBlockDim + x];
75
5.76M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
5.76M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
5.76M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
5.76M
      float r00 = c00 + c01 + c10 + c11;
79
5.76M
      float r01 = c00 + c01 - c10 - c11;
80
5.76M
      float r10 = c00 - c01 + c10 - c11;
81
5.76M
      float r11 = c00 - c01 - c10 + c11;
82
5.76M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
5.76M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
5.76M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
5.76M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
5.76M
    }
87
5.76M
  }
88
17.2M
  for (size_t y = 0; y < S; y++) {
89
34.5M
    for (size_t x = 0; x < S; x++) {
90
23.0M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
23.0M
    }
92
11.5M
  }
93
5.76M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
5.76M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
5.76M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
5.76M
  static_assert(S % 2 == 0, "S should be even");
70
5.76M
  float temp[kDCTBlockSize];
71
5.76M
  constexpr size_t num_2x2 = S / 2;
72
17.2M
  for (size_t y = 0; y < num_2x2; y++) {
73
34.5M
    for (size_t x = 0; x < num_2x2; x++) {
74
23.0M
      float c00 = block[y * kBlockDim + x];
75
23.0M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
23.0M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
23.0M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
23.0M
      float r00 = c00 + c01 + c10 + c11;
79
23.0M
      float r01 = c00 + c01 - c10 - c11;
80
23.0M
      float r10 = c00 - c01 + c10 - c11;
81
23.0M
      float r11 = c00 - c01 - c10 + c11;
82
23.0M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
23.0M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
23.0M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
23.0M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
23.0M
    }
87
11.5M
  }
88
28.8M
  for (size_t y = 0; y < S; y++) {
89
115M
    for (size_t x = 0; x < S; x++) {
90
92.1M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
92.1M
    }
92
23.0M
  }
93
5.76M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
5.76M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
5.76M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
5.76M
  static_assert(S % 2 == 0, "S should be even");
70
5.76M
  float temp[kDCTBlockSize];
71
5.76M
  constexpr size_t num_2x2 = S / 2;
72
28.8M
  for (size_t y = 0; y < num_2x2; y++) {
73
115M
    for (size_t x = 0; x < num_2x2; x++) {
74
92.1M
      float c00 = block[y * kBlockDim + x];
75
92.1M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
92.1M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
92.1M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
92.1M
      float r00 = c00 + c01 + c10 + c11;
79
92.1M
      float r01 = c00 + c01 - c10 - c11;
80
92.1M
      float r10 = c00 - c01 + c10 - c11;
81
92.1M
      float r11 = c00 - c01 - c10 + c11;
82
92.1M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
92.1M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
92.1M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
92.1M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
92.1M
    }
87
23.0M
  }
88
51.8M
  for (size_t y = 0; y < S; y++) {
89
414M
    for (size_t x = 0; x < S; x++) {
90
368M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
368M
    }
92
46.0M
  }
93
5.76M
}
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
94
95
44.1M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
44.1M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
44.1M
      {
98
44.1M
          0.25,
99
44.1M
          0.25,
100
44.1M
          0.25,
101
44.1M
          0.25,
102
44.1M
          0.25,
103
44.1M
          0.25,
104
44.1M
          0.25,
105
44.1M
          0.25,
106
44.1M
          0.25,
107
44.1M
          0.25,
108
44.1M
          0.25,
109
44.1M
          0.25,
110
44.1M
          0.25,
111
44.1M
          0.25,
112
44.1M
          0.25,
113
44.1M
          0.25,
114
44.1M
      },
115
44.1M
      {
116
44.1M
          0.876902929799142f,
117
44.1M
          0.2206518106944235f,
118
44.1M
          -0.10140050393753763f,
119
44.1M
          -0.1014005039375375f,
120
44.1M
          0.2206518106944236f,
121
44.1M
          -0.10140050393753777f,
122
44.1M
          -0.10140050393753772f,
123
44.1M
          -0.10140050393753763f,
124
44.1M
          -0.10140050393753758f,
125
44.1M
          -0.10140050393753769f,
126
44.1M
          -0.1014005039375375f,
127
44.1M
          -0.10140050393753768f,
128
44.1M
          -0.10140050393753768f,
129
44.1M
          -0.10140050393753759f,
130
44.1M
          -0.10140050393753763f,
131
44.1M
          -0.10140050393753741f,
132
44.1M
      },
133
44.1M
      {
134
44.1M
          0.0,
135
44.1M
          0.0,
136
44.1M
          0.40670075830260755f,
137
44.1M
          0.44444816619734445f,
138
44.1M
          0.0,
139
44.1M
          0.0,
140
44.1M
          0.19574399372042936f,
141
44.1M
          0.2929100136981264f,
142
44.1M
          -0.40670075830260716f,
143
44.1M
          -0.19574399372042872f,
144
44.1M
          0.0,
145
44.1M
          0.11379074460448091f,
146
44.1M
          -0.44444816619734384f,
147
44.1M
          -0.29291001369812636f,
148
44.1M
          -0.1137907446044814f,
149
44.1M
          0.0,
150
44.1M
      },
151
44.1M
      {
152
44.1M
          0.0,
153
44.1M
          0.0,
154
44.1M
          -0.21255748058288748f,
155
44.1M
          0.3085497062849767f,
156
44.1M
          0.0,
157
44.1M
          0.4706702258572536f,
158
44.1M
          -0.1621205195722993f,
159
44.1M
          0.0,
160
44.1M
          -0.21255748058287047f,
161
44.1M
          -0.16212051957228327f,
162
44.1M
          -0.47067022585725277f,
163
44.1M
          -0.1464291867126764f,
164
44.1M
          0.3085497062849487f,
165
44.1M
          0.0,
166
44.1M
          -0.14642918671266536f,
167
44.1M
          0.4251149611657548f,
168
44.1M
      },
169
44.1M
      {
170
44.1M
          0.0,
171
44.1M
          -0.7071067811865474f,
172
44.1M
          0.0,
173
44.1M
          0.0,
174
44.1M
          0.7071067811865476f,
175
44.1M
          0.0,
176
44.1M
          0.0,
177
44.1M
          0.0,
178
44.1M
          0.0,
179
44.1M
          0.0,
180
44.1M
          0.0,
181
44.1M
          0.0,
182
44.1M
          0.0,
183
44.1M
          0.0,
184
44.1M
          0.0,
185
44.1M
          0.0,
186
44.1M
      },
187
44.1M
      {
188
44.1M
          -0.4105377591765233f,
189
44.1M
          0.6235485373547691f,
190
44.1M
          -0.06435071657946274f,
191
44.1M
          -0.06435071657946266f,
192
44.1M
          0.6235485373547694f,
193
44.1M
          -0.06435071657946284f,
194
44.1M
          -0.0643507165794628f,
195
44.1M
          -0.06435071657946274f,
196
44.1M
          -0.06435071657946272f,
197
44.1M
          -0.06435071657946279f,
198
44.1M
          -0.06435071657946266f,
199
44.1M
          -0.06435071657946277f,
200
44.1M
          -0.06435071657946277f,
201
44.1M
          -0.06435071657946273f,
202
44.1M
          -0.06435071657946274f,
203
44.1M
          -0.0643507165794626f,
204
44.1M
      },
205
44.1M
      {
206
44.1M
          0.0,
207
44.1M
          0.0,
208
44.1M
          -0.4517556589999482f,
209
44.1M
          0.15854503551840063f,
210
44.1M
          0.0,
211
44.1M
          -0.04038515160822202f,
212
44.1M
          0.0074182263792423875f,
213
44.1M
          0.39351034269210167f,
214
44.1M
          -0.45175565899994635f,
215
44.1M
          0.007418226379244351f,
216
44.1M
          0.1107416575309343f,
217
44.1M
          0.08298163094882051f,
218
44.1M
          0.15854503551839705f,
219
44.1M
          0.3935103426921022f,
220
44.1M
          0.0829816309488214f,
221
44.1M
          -0.45175565899994796f,
222
44.1M
      },
223
44.1M
      {
224
44.1M
          0.0,
225
44.1M
          0.0,
226
44.1M
          -0.304684750724869f,
227
44.1M
          0.5112616136591823f,
228
44.1M
          0.0,
229
44.1M
          0.0,
230
44.1M
          -0.290480129728998f,
231
44.1M
          -0.06578701549142804f,
232
44.1M
          0.304684750724884f,
233
44.1M
          0.2904801297290076f,
234
44.1M
          0.0,
235
44.1M
          -0.23889773523344604f,
236
44.1M
          -0.5112616136592012f,
237
44.1M
          0.06578701549142545f,
238
44.1M
          0.23889773523345467f,
239
44.1M
          0.0,
240
44.1M
      },
241
44.1M
      {
242
44.1M
          0.0,
243
44.1M
          0.0,
244
44.1M
          0.3017929516615495f,
245
44.1M
          0.25792362796341184f,
246
44.1M
          0.0,
247
44.1M
          0.16272340142866204f,
248
44.1M
          0.09520022653475037f,
249
44.1M
          0.0,
250
44.1M
          0.3017929516615503f,
251
44.1M
          0.09520022653475055f,
252
44.1M
          -0.16272340142866173f,
253
44.1M
          -0.35312385449816297f,
254
44.1M
          0.25792362796341295f,
255
44.1M
          0.0,
256
44.1M
          -0.3531238544981624f,
257
44.1M
          -0.6035859033230976f,
258
44.1M
      },
259
44.1M
      {
260
44.1M
          0.0,
261
44.1M
          0.0,
262
44.1M
          0.40824829046386274f,
263
44.1M
          0.0,
264
44.1M
          0.0,
265
44.1M
          0.0,
266
44.1M
          0.0,
267
44.1M
          -0.4082482904638628f,
268
44.1M
          -0.4082482904638635f,
269
44.1M
          0.0,
270
44.1M
          0.0,
271
44.1M
          -0.40824829046386296f,
272
44.1M
          0.0,
273
44.1M
          0.4082482904638634f,
274
44.1M
          0.408248290463863f,
275
44.1M
          0.0,
276
44.1M
      },
277
44.1M
      {
278
44.1M
          0.0,
279
44.1M
          0.0,
280
44.1M
          0.1747866975480809f,
281
44.1M
          0.0812611176717539f,
282
44.1M
          0.0,
283
44.1M
          0.0,
284
44.1M
          -0.3675398009862027f,
285
44.1M
          -0.307882213957909f,
286
44.1M
          -0.17478669754808135f,
287
44.1M
          0.3675398009862011f,
288
44.1M
          0.0,
289
44.1M
          0.4826689115059883f,
290
44.1M
          -0.08126111767175039f,
291
44.1M
          0.30788221395790305f,
292
44.1M
          -0.48266891150598584f,
293
44.1M
          0.0,
294
44.1M
      },
295
44.1M
      {
296
44.1M
          0.0,
297
44.1M
          0.0,
298
44.1M
          -0.21105601049335784f,
299
44.1M
          0.18567180916109802f,
300
44.1M
          0.0,
301
44.1M
          0.0,
302
44.1M
          0.49215859013738733f,
303
44.1M
          -0.38525013709251915f,
304
44.1M
          0.21105601049335806f,
305
44.1M
          -0.49215859013738905f,
306
44.1M
          0.0,
307
44.1M
          0.17419412659916217f,
308
44.1M
          -0.18567180916109904f,
309
44.1M
          0.3852501370925211f,
310
44.1M
          -0.1741941265991621f,
311
44.1M
          0.0,
312
44.1M
      },
313
44.1M
      {
314
44.1M
          0.0,
315
44.1M
          0.0,
316
44.1M
          -0.14266084808807264f,
317
44.1M
          -0.3416446842253372f,
318
44.1M
          0.0,
319
44.1M
          0.7367497537172237f,
320
44.1M
          0.24627107722075148f,
321
44.1M
          -0.08574019035519306f,
322
44.1M
          -0.14266084808807344f,
323
44.1M
          0.24627107722075137f,
324
44.1M
          0.14883399227113567f,
325
44.1M
          -0.04768680350229251f,
326
44.1M
          -0.3416446842253373f,
327
44.1M
          -0.08574019035519267f,
328
44.1M
          -0.047686803502292804f,
329
44.1M
          -0.14266084808807242f,
330
44.1M
      },
331
44.1M
      {
332
44.1M
          0.0,
333
44.1M
          0.0,
334
44.1M
          -0.13813540350758585f,
335
44.1M
          0.3302282550303788f,
336
44.1M
          0.0,
337
44.1M
          0.08755115000587084f,
338
44.1M
          -0.07946706605909573f,
339
44.1M
          -0.4613374887461511f,
340
44.1M
          -0.13813540350758294f,
341
44.1M
          -0.07946706605910261f,
342
44.1M
          0.49724647109535086f,
343
44.1M
          0.12538059448563663f,
344
44.1M
          0.3302282550303805f,
345
44.1M
          -0.4613374887461554f,
346
44.1M
          0.12538059448564315f,
347
44.1M
          -0.13813540350758452f,
348
44.1M
      },
349
44.1M
      {
350
44.1M
          0.0,
351
44.1M
          0.0,
352
44.1M
          -0.17437602599651067f,
353
44.1M
          0.0702790691196284f,
354
44.1M
          0.0,
355
44.1M
          -0.2921026642334881f,
356
44.1M
          0.3623817333531167f,
357
44.1M
          0.0,
358
44.1M
          -0.1743760259965108f,
359
44.1M
          0.36238173335311646f,
360
44.1M
          0.29210266423348785f,
361
44.1M
          -0.4326608024727445f,
362
44.1M
          0.07027906911962818f,
363
44.1M
          0.0,
364
44.1M
          -0.4326608024727457f,
365
44.1M
          0.34875205199302267f,
366
44.1M
      },
367
44.1M
      {
368
44.1M
          0.0,
369
44.1M
          0.0,
370
44.1M
          0.11354987314994337f,
371
44.1M
          -0.07417504595810355f,
372
44.1M
          0.0,
373
44.1M
          0.19402893032594343f,
374
44.1M
          -0.435190496523228f,
375
44.1M
          0.21918684838857466f,
376
44.1M
          0.11354987314994257f,
377
44.1M
          -0.4351904965232251f,
378
44.1M
          0.5550443808910661f,
379
44.1M
          -0.25468277124066463f,
380
44.1M
          -0.07417504595810233f,
381
44.1M
          0.2191868483885728f,
382
44.1M
          -0.25468277124066413f,
383
44.1M
          0.1135498731499429f,
384
44.1M
      },
385
44.1M
  };
386
387
44.1M
  const HWY_CAPPED(float, 16) d;
388
132M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
88.2M
    auto pixel = Zero(d);
390
1.50G
    for (size_t j = 0; j < 16; j++) {
391
1.41G
      auto cf = Set(d, coeffs[j]);
392
1.41G
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
1.41G
      pixel = MulAdd(cf, basis, pixel);
394
1.41G
    }
395
88.2M
    Store(pixel, d, pixels + i);
396
88.2M
  }
397
44.1M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
43.3M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
43.3M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
43.3M
      {
98
43.3M
          0.25,
99
43.3M
          0.25,
100
43.3M
          0.25,
101
43.3M
          0.25,
102
43.3M
          0.25,
103
43.3M
          0.25,
104
43.3M
          0.25,
105
43.3M
          0.25,
106
43.3M
          0.25,
107
43.3M
          0.25,
108
43.3M
          0.25,
109
43.3M
          0.25,
110
43.3M
          0.25,
111
43.3M
          0.25,
112
43.3M
          0.25,
113
43.3M
          0.25,
114
43.3M
      },
115
43.3M
      {
116
43.3M
          0.876902929799142f,
117
43.3M
          0.2206518106944235f,
118
43.3M
          -0.10140050393753763f,
119
43.3M
          -0.1014005039375375f,
120
43.3M
          0.2206518106944236f,
121
43.3M
          -0.10140050393753777f,
122
43.3M
          -0.10140050393753772f,
123
43.3M
          -0.10140050393753763f,
124
43.3M
          -0.10140050393753758f,
125
43.3M
          -0.10140050393753769f,
126
43.3M
          -0.1014005039375375f,
127
43.3M
          -0.10140050393753768f,
128
43.3M
          -0.10140050393753768f,
129
43.3M
          -0.10140050393753759f,
130
43.3M
          -0.10140050393753763f,
131
43.3M
          -0.10140050393753741f,
132
43.3M
      },
133
43.3M
      {
134
43.3M
          0.0,
135
43.3M
          0.0,
136
43.3M
          0.40670075830260755f,
137
43.3M
          0.44444816619734445f,
138
43.3M
          0.0,
139
43.3M
          0.0,
140
43.3M
          0.19574399372042936f,
141
43.3M
          0.2929100136981264f,
142
43.3M
          -0.40670075830260716f,
143
43.3M
          -0.19574399372042872f,
144
43.3M
          0.0,
145
43.3M
          0.11379074460448091f,
146
43.3M
          -0.44444816619734384f,
147
43.3M
          -0.29291001369812636f,
148
43.3M
          -0.1137907446044814f,
149
43.3M
          0.0,
150
43.3M
      },
151
43.3M
      {
152
43.3M
          0.0,
153
43.3M
          0.0,
154
43.3M
          -0.21255748058288748f,
155
43.3M
          0.3085497062849767f,
156
43.3M
          0.0,
157
43.3M
          0.4706702258572536f,
158
43.3M
          -0.1621205195722993f,
159
43.3M
          0.0,
160
43.3M
          -0.21255748058287047f,
161
43.3M
          -0.16212051957228327f,
162
43.3M
          -0.47067022585725277f,
163
43.3M
          -0.1464291867126764f,
164
43.3M
          0.3085497062849487f,
165
43.3M
          0.0,
166
43.3M
          -0.14642918671266536f,
167
43.3M
          0.4251149611657548f,
168
43.3M
      },
169
43.3M
      {
170
43.3M
          0.0,
171
43.3M
          -0.7071067811865474f,
172
43.3M
          0.0,
173
43.3M
          0.0,
174
43.3M
          0.7071067811865476f,
175
43.3M
          0.0,
176
43.3M
          0.0,
177
43.3M
          0.0,
178
43.3M
          0.0,
179
43.3M
          0.0,
180
43.3M
          0.0,
181
43.3M
          0.0,
182
43.3M
          0.0,
183
43.3M
          0.0,
184
43.3M
          0.0,
185
43.3M
          0.0,
186
43.3M
      },
187
43.3M
      {
188
43.3M
          -0.4105377591765233f,
189
43.3M
          0.6235485373547691f,
190
43.3M
          -0.06435071657946274f,
191
43.3M
          -0.06435071657946266f,
192
43.3M
          0.6235485373547694f,
193
43.3M
          -0.06435071657946284f,
194
43.3M
          -0.0643507165794628f,
195
43.3M
          -0.06435071657946274f,
196
43.3M
          -0.06435071657946272f,
197
43.3M
          -0.06435071657946279f,
198
43.3M
          -0.06435071657946266f,
199
43.3M
          -0.06435071657946277f,
200
43.3M
          -0.06435071657946277f,
201
43.3M
          -0.06435071657946273f,
202
43.3M
          -0.06435071657946274f,
203
43.3M
          -0.0643507165794626f,
204
43.3M
      },
205
43.3M
      {
206
43.3M
          0.0,
207
43.3M
          0.0,
208
43.3M
          -0.4517556589999482f,
209
43.3M
          0.15854503551840063f,
210
43.3M
          0.0,
211
43.3M
          -0.04038515160822202f,
212
43.3M
          0.0074182263792423875f,
213
43.3M
          0.39351034269210167f,
214
43.3M
          -0.45175565899994635f,
215
43.3M
          0.007418226379244351f,
216
43.3M
          0.1107416575309343f,
217
43.3M
          0.08298163094882051f,
218
43.3M
          0.15854503551839705f,
219
43.3M
          0.3935103426921022f,
220
43.3M
          0.0829816309488214f,
221
43.3M
          -0.45175565899994796f,
222
43.3M
      },
223
43.3M
      {
224
43.3M
          0.0,
225
43.3M
          0.0,
226
43.3M
          -0.304684750724869f,
227
43.3M
          0.5112616136591823f,
228
43.3M
          0.0,
229
43.3M
          0.0,
230
43.3M
          -0.290480129728998f,
231
43.3M
          -0.06578701549142804f,
232
43.3M
          0.304684750724884f,
233
43.3M
          0.2904801297290076f,
234
43.3M
          0.0,
235
43.3M
          -0.23889773523344604f,
236
43.3M
          -0.5112616136592012f,
237
43.3M
          0.06578701549142545f,
238
43.3M
          0.23889773523345467f,
239
43.3M
          0.0,
240
43.3M
      },
241
43.3M
      {
242
43.3M
          0.0,
243
43.3M
          0.0,
244
43.3M
          0.3017929516615495f,
245
43.3M
          0.25792362796341184f,
246
43.3M
          0.0,
247
43.3M
          0.16272340142866204f,
248
43.3M
          0.09520022653475037f,
249
43.3M
          0.0,
250
43.3M
          0.3017929516615503f,
251
43.3M
          0.09520022653475055f,
252
43.3M
          -0.16272340142866173f,
253
43.3M
          -0.35312385449816297f,
254
43.3M
          0.25792362796341295f,
255
43.3M
          0.0,
256
43.3M
          -0.3531238544981624f,
257
43.3M
          -0.6035859033230976f,
258
43.3M
      },
259
43.3M
      {
260
43.3M
          0.0,
261
43.3M
          0.0,
262
43.3M
          0.40824829046386274f,
263
43.3M
          0.0,
264
43.3M
          0.0,
265
43.3M
          0.0,
266
43.3M
          0.0,
267
43.3M
          -0.4082482904638628f,
268
43.3M
          -0.4082482904638635f,
269
43.3M
          0.0,
270
43.3M
          0.0,
271
43.3M
          -0.40824829046386296f,
272
43.3M
          0.0,
273
43.3M
          0.4082482904638634f,
274
43.3M
          0.408248290463863f,
275
43.3M
          0.0,
276
43.3M
      },
277
43.3M
      {
278
43.3M
          0.0,
279
43.3M
          0.0,
280
43.3M
          0.1747866975480809f,
281
43.3M
          0.0812611176717539f,
282
43.3M
          0.0,
283
43.3M
          0.0,
284
43.3M
          -0.3675398009862027f,
285
43.3M
          -0.307882213957909f,
286
43.3M
          -0.17478669754808135f,
287
43.3M
          0.3675398009862011f,
288
43.3M
          0.0,
289
43.3M
          0.4826689115059883f,
290
43.3M
          -0.08126111767175039f,
291
43.3M
          0.30788221395790305f,
292
43.3M
          -0.48266891150598584f,
293
43.3M
          0.0,
294
43.3M
      },
295
43.3M
      {
296
43.3M
          0.0,
297
43.3M
          0.0,
298
43.3M
          -0.21105601049335784f,
299
43.3M
          0.18567180916109802f,
300
43.3M
          0.0,
301
43.3M
          0.0,
302
43.3M
          0.49215859013738733f,
303
43.3M
          -0.38525013709251915f,
304
43.3M
          0.21105601049335806f,
305
43.3M
          -0.49215859013738905f,
306
43.3M
          0.0,
307
43.3M
          0.17419412659916217f,
308
43.3M
          -0.18567180916109904f,
309
43.3M
          0.3852501370925211f,
310
43.3M
          -0.1741941265991621f,
311
43.3M
          0.0,
312
43.3M
      },
313
43.3M
      {
314
43.3M
          0.0,
315
43.3M
          0.0,
316
43.3M
          -0.14266084808807264f,
317
43.3M
          -0.3416446842253372f,
318
43.3M
          0.0,
319
43.3M
          0.7367497537172237f,
320
43.3M
          0.24627107722075148f,
321
43.3M
          -0.08574019035519306f,
322
43.3M
          -0.14266084808807344f,
323
43.3M
          0.24627107722075137f,
324
43.3M
          0.14883399227113567f,
325
43.3M
          -0.04768680350229251f,
326
43.3M
          -0.3416446842253373f,
327
43.3M
          -0.08574019035519267f,
328
43.3M
          -0.047686803502292804f,
329
43.3M
          -0.14266084808807242f,
330
43.3M
      },
331
43.3M
      {
332
43.3M
          0.0,
333
43.3M
          0.0,
334
43.3M
          -0.13813540350758585f,
335
43.3M
          0.3302282550303788f,
336
43.3M
          0.0,
337
43.3M
          0.08755115000587084f,
338
43.3M
          -0.07946706605909573f,
339
43.3M
          -0.4613374887461511f,
340
43.3M
          -0.13813540350758294f,
341
43.3M
          -0.07946706605910261f,
342
43.3M
          0.49724647109535086f,
343
43.3M
          0.12538059448563663f,
344
43.3M
          0.3302282550303805f,
345
43.3M
          -0.4613374887461554f,
346
43.3M
          0.12538059448564315f,
347
43.3M
          -0.13813540350758452f,
348
43.3M
      },
349
43.3M
      {
350
43.3M
          0.0,
351
43.3M
          0.0,
352
43.3M
          -0.17437602599651067f,
353
43.3M
          0.0702790691196284f,
354
43.3M
          0.0,
355
43.3M
          -0.2921026642334881f,
356
43.3M
          0.3623817333531167f,
357
43.3M
          0.0,
358
43.3M
          -0.1743760259965108f,
359
43.3M
          0.36238173335311646f,
360
43.3M
          0.29210266423348785f,
361
43.3M
          -0.4326608024727445f,
362
43.3M
          0.07027906911962818f,
363
43.3M
          0.0,
364
43.3M
          -0.4326608024727457f,
365
43.3M
          0.34875205199302267f,
366
43.3M
      },
367
43.3M
      {
368
43.3M
          0.0,
369
43.3M
          0.0,
370
43.3M
          0.11354987314994337f,
371
43.3M
          -0.07417504595810355f,
372
43.3M
          0.0,
373
43.3M
          0.19402893032594343f,
374
43.3M
          -0.435190496523228f,
375
43.3M
          0.21918684838857466f,
376
43.3M
          0.11354987314994257f,
377
43.3M
          -0.4351904965232251f,
378
43.3M
          0.5550443808910661f,
379
43.3M
          -0.25468277124066463f,
380
43.3M
          -0.07417504595810233f,
381
43.3M
          0.2191868483885728f,
382
43.3M
          -0.25468277124066413f,
383
43.3M
          0.1135498731499429f,
384
43.3M
      },
385
43.3M
  };
386
387
43.3M
  const HWY_CAPPED(float, 16) d;
388
129M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
86.6M
    auto pixel = Zero(d);
390
1.47G
    for (size_t j = 0; j < 16; j++) {
391
1.38G
      auto cf = Set(d, coeffs[j]);
392
1.38G
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
1.38G
      pixel = MulAdd(cf, basis, pixel);
394
1.38G
    }
395
86.6M
    Store(pixel, d, pixels + i);
396
86.6M
  }
397
43.3M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
826k
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
826k
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
826k
      {
98
826k
          0.25,
99
826k
          0.25,
100
826k
          0.25,
101
826k
          0.25,
102
826k
          0.25,
103
826k
          0.25,
104
826k
          0.25,
105
826k
          0.25,
106
826k
          0.25,
107
826k
          0.25,
108
826k
          0.25,
109
826k
          0.25,
110
826k
          0.25,
111
826k
          0.25,
112
826k
          0.25,
113
826k
          0.25,
114
826k
      },
115
826k
      {
116
826k
          0.876902929799142f,
117
826k
          0.2206518106944235f,
118
826k
          -0.10140050393753763f,
119
826k
          -0.1014005039375375f,
120
826k
          0.2206518106944236f,
121
826k
          -0.10140050393753777f,
122
826k
          -0.10140050393753772f,
123
826k
          -0.10140050393753763f,
124
826k
          -0.10140050393753758f,
125
826k
          -0.10140050393753769f,
126
826k
          -0.1014005039375375f,
127
826k
          -0.10140050393753768f,
128
826k
          -0.10140050393753768f,
129
826k
          -0.10140050393753759f,
130
826k
          -0.10140050393753763f,
131
826k
          -0.10140050393753741f,
132
826k
      },
133
826k
      {
134
826k
          0.0,
135
826k
          0.0,
136
826k
          0.40670075830260755f,
137
826k
          0.44444816619734445f,
138
826k
          0.0,
139
826k
          0.0,
140
826k
          0.19574399372042936f,
141
826k
          0.2929100136981264f,
142
826k
          -0.40670075830260716f,
143
826k
          -0.19574399372042872f,
144
826k
          0.0,
145
826k
          0.11379074460448091f,
146
826k
          -0.44444816619734384f,
147
826k
          -0.29291001369812636f,
148
826k
          -0.1137907446044814f,
149
826k
          0.0,
150
826k
      },
151
826k
      {
152
826k
          0.0,
153
826k
          0.0,
154
826k
          -0.21255748058288748f,
155
826k
          0.3085497062849767f,
156
826k
          0.0,
157
826k
          0.4706702258572536f,
158
826k
          -0.1621205195722993f,
159
826k
          0.0,
160
826k
          -0.21255748058287047f,
161
826k
          -0.16212051957228327f,
162
826k
          -0.47067022585725277f,
163
826k
          -0.1464291867126764f,
164
826k
          0.3085497062849487f,
165
826k
          0.0,
166
826k
          -0.14642918671266536f,
167
826k
          0.4251149611657548f,
168
826k
      },
169
826k
      {
170
826k
          0.0,
171
826k
          -0.7071067811865474f,
172
826k
          0.0,
173
826k
          0.0,
174
826k
          0.7071067811865476f,
175
826k
          0.0,
176
826k
          0.0,
177
826k
          0.0,
178
826k
          0.0,
179
826k
          0.0,
180
826k
          0.0,
181
826k
          0.0,
182
826k
          0.0,
183
826k
          0.0,
184
826k
          0.0,
185
826k
          0.0,
186
826k
      },
187
826k
      {
188
826k
          -0.4105377591765233f,
189
826k
          0.6235485373547691f,
190
826k
          -0.06435071657946274f,
191
826k
          -0.06435071657946266f,
192
826k
          0.6235485373547694f,
193
826k
          -0.06435071657946284f,
194
826k
          -0.0643507165794628f,
195
826k
          -0.06435071657946274f,
196
826k
          -0.06435071657946272f,
197
826k
          -0.06435071657946279f,
198
826k
          -0.06435071657946266f,
199
826k
          -0.06435071657946277f,
200
826k
          -0.06435071657946277f,
201
826k
          -0.06435071657946273f,
202
826k
          -0.06435071657946274f,
203
826k
          -0.0643507165794626f,
204
826k
      },
205
826k
      {
206
826k
          0.0,
207
826k
          0.0,
208
826k
          -0.4517556589999482f,
209
826k
          0.15854503551840063f,
210
826k
          0.0,
211
826k
          -0.04038515160822202f,
212
826k
          0.0074182263792423875f,
213
826k
          0.39351034269210167f,
214
826k
          -0.45175565899994635f,
215
826k
          0.007418226379244351f,
216
826k
          0.1107416575309343f,
217
826k
          0.08298163094882051f,
218
826k
          0.15854503551839705f,
219
826k
          0.3935103426921022f,
220
826k
          0.0829816309488214f,
221
826k
          -0.45175565899994796f,
222
826k
      },
223
826k
      {
224
826k
          0.0,
225
826k
          0.0,
226
826k
          -0.304684750724869f,
227
826k
          0.5112616136591823f,
228
826k
          0.0,
229
826k
          0.0,
230
826k
          -0.290480129728998f,
231
826k
          -0.06578701549142804f,
232
826k
          0.304684750724884f,
233
826k
          0.2904801297290076f,
234
826k
          0.0,
235
826k
          -0.23889773523344604f,
236
826k
          -0.5112616136592012f,
237
826k
          0.06578701549142545f,
238
826k
          0.23889773523345467f,
239
826k
          0.0,
240
826k
      },
241
826k
      {
242
826k
          0.0,
243
826k
          0.0,
244
826k
          0.3017929516615495f,
245
826k
          0.25792362796341184f,
246
826k
          0.0,
247
826k
          0.16272340142866204f,
248
826k
          0.09520022653475037f,
249
826k
          0.0,
250
826k
          0.3017929516615503f,
251
826k
          0.09520022653475055f,
252
826k
          -0.16272340142866173f,
253
826k
          -0.35312385449816297f,
254
826k
          0.25792362796341295f,
255
826k
          0.0,
256
826k
          -0.3531238544981624f,
257
826k
          -0.6035859033230976f,
258
826k
      },
259
826k
      {
260
826k
          0.0,
261
826k
          0.0,
262
826k
          0.40824829046386274f,
263
826k
          0.0,
264
826k
          0.0,
265
826k
          0.0,
266
826k
          0.0,
267
826k
          -0.4082482904638628f,
268
826k
          -0.4082482904638635f,
269
826k
          0.0,
270
826k
          0.0,
271
826k
          -0.40824829046386296f,
272
826k
          0.0,
273
826k
          0.4082482904638634f,
274
826k
          0.408248290463863f,
275
826k
          0.0,
276
826k
      },
277
826k
      {
278
826k
          0.0,
279
826k
          0.0,
280
826k
          0.1747866975480809f,
281
826k
          0.0812611176717539f,
282
826k
          0.0,
283
826k
          0.0,
284
826k
          -0.3675398009862027f,
285
826k
          -0.307882213957909f,
286
826k
          -0.17478669754808135f,
287
826k
          0.3675398009862011f,
288
826k
          0.0,
289
826k
          0.4826689115059883f,
290
826k
          -0.08126111767175039f,
291
826k
          0.30788221395790305f,
292
826k
          -0.48266891150598584f,
293
826k
          0.0,
294
826k
      },
295
826k
      {
296
826k
          0.0,
297
826k
          0.0,
298
826k
          -0.21105601049335784f,
299
826k
          0.18567180916109802f,
300
826k
          0.0,
301
826k
          0.0,
302
826k
          0.49215859013738733f,
303
826k
          -0.38525013709251915f,
304
826k
          0.21105601049335806f,
305
826k
          -0.49215859013738905f,
306
826k
          0.0,
307
826k
          0.17419412659916217f,
308
826k
          -0.18567180916109904f,
309
826k
          0.3852501370925211f,
310
826k
          -0.1741941265991621f,
311
826k
          0.0,
312
826k
      },
313
826k
      {
314
826k
          0.0,
315
826k
          0.0,
316
826k
          -0.14266084808807264f,
317
826k
          -0.3416446842253372f,
318
826k
          0.0,
319
826k
          0.7367497537172237f,
320
826k
          0.24627107722075148f,
321
826k
          -0.08574019035519306f,
322
826k
          -0.14266084808807344f,
323
826k
          0.24627107722075137f,
324
826k
          0.14883399227113567f,
325
826k
          -0.04768680350229251f,
326
826k
          -0.3416446842253373f,
327
826k
          -0.08574019035519267f,
328
826k
          -0.047686803502292804f,
329
826k
          -0.14266084808807242f,
330
826k
      },
331
826k
      {
332
826k
          0.0,
333
826k
          0.0,
334
826k
          -0.13813540350758585f,
335
826k
          0.3302282550303788f,
336
826k
          0.0,
337
826k
          0.08755115000587084f,
338
826k
          -0.07946706605909573f,
339
826k
          -0.4613374887461511f,
340
826k
          -0.13813540350758294f,
341
826k
          -0.07946706605910261f,
342
826k
          0.49724647109535086f,
343
826k
          0.12538059448563663f,
344
826k
          0.3302282550303805f,
345
826k
          -0.4613374887461554f,
346
826k
          0.12538059448564315f,
347
826k
          -0.13813540350758452f,
348
826k
      },
349
826k
      {
350
826k
          0.0,
351
826k
          0.0,
352
826k
          -0.17437602599651067f,
353
826k
          0.0702790691196284f,
354
826k
          0.0,
355
826k
          -0.2921026642334881f,
356
826k
          0.3623817333531167f,
357
826k
          0.0,
358
826k
          -0.1743760259965108f,
359
826k
          0.36238173335311646f,
360
826k
          0.29210266423348785f,
361
826k
          -0.4326608024727445f,
362
826k
          0.07027906911962818f,
363
826k
          0.0,
364
826k
          -0.4326608024727457f,
365
826k
          0.34875205199302267f,
366
826k
      },
367
826k
      {
368
826k
          0.0,
369
826k
          0.0,
370
826k
          0.11354987314994337f,
371
826k
          -0.07417504595810355f,
372
826k
          0.0,
373
826k
          0.19402893032594343f,
374
826k
          -0.435190496523228f,
375
826k
          0.21918684838857466f,
376
826k
          0.11354987314994257f,
377
826k
          -0.4351904965232251f,
378
826k
          0.5550443808910661f,
379
826k
          -0.25468277124066463f,
380
826k
          -0.07417504595810233f,
381
826k
          0.2191868483885728f,
382
826k
          -0.25468277124066413f,
383
826k
          0.1135498731499429f,
384
826k
      },
385
826k
  };
386
387
826k
  const HWY_CAPPED(float, 16) d;
388
2.48M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
1.65M
    auto pixel = Zero(d);
390
28.1M
    for (size_t j = 0; j < 16; j++) {
391
26.4M
      auto cf = Set(d, coeffs[j]);
392
26.4M
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
26.4M
      pixel = MulAdd(cf, basis, pixel);
394
26.4M
    }
395
1.65M
    Store(pixel, d, pixels + i);
396
1.65M
  }
397
826k
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
398
399
template <size_t afv_kind>
400
void AFVTransformToPixels(const float* JXL_RESTRICT coefficients,
401
44.1M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
44.1M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
44.1M
  size_t afv_x = afv_kind & 1;
404
44.1M
  size_t afv_y = afv_kind / 2;
405
44.1M
  float dcs[3] = {};
406
44.1M
  float block00 = coefficients[0];
407
44.1M
  float block01 = coefficients[1];
408
44.1M
  float block10 = coefficients[8];
409
44.1M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
44.1M
  dcs[1] = (block00 + block10 - block01);
411
44.1M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
44.1M
  HWY_ALIGN float coeff[4 * 4];
414
44.1M
  coeff[0] = dcs[0];
415
220M
  for (size_t iy = 0; iy < 4; iy++) {
416
882M
    for (size_t ix = 0; ix < 4; ix++) {
417
706M
      if (ix == 0 && iy == 0) continue;
418
661M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
661M
    }
420
176M
  }
421
44.1M
  HWY_ALIGN float block[4 * 8];
422
44.1M
  AFVIDCT4x4(coeff, block);
423
220M
  for (size_t iy = 0; iy < 4; iy++) {
424
882M
    for (size_t ix = 0; ix < 4; ix++) {
425
706M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
706M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
706M
    }
428
176M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
44.1M
  block[0] = dcs[1];
431
220M
  for (size_t iy = 0; iy < 4; iy++) {
432
882M
    for (size_t ix = 0; ix < 4; ix++) {
433
706M
      if (ix == 0 && iy == 0) continue;
434
661M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
661M
    }
436
176M
  }
437
44.1M
  ComputeScaledIDCT<4, 4>()(
438
44.1M
      block,
439
44.1M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
44.1M
            pixels_stride),
441
44.1M
      scratch_space);
442
  // IDCT4x8.
443
44.1M
  block[0] = dcs[2];
444
220M
  for (size_t iy = 0; iy < 4; iy++) {
445
1.58G
    for (size_t ix = 0; ix < 8; ix++) {
446
1.41G
      if (ix == 0 && iy == 0) continue;
447
1.36G
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
1.36G
    }
449
176M
  }
450
44.1M
  ComputeScaledIDCT<4, 8>()(
451
44.1M
      block,
452
44.1M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
44.1M
      scratch_space);
454
44.1M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
10.8M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
10.8M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
10.8M
  size_t afv_x = afv_kind & 1;
404
10.8M
  size_t afv_y = afv_kind / 2;
405
10.8M
  float dcs[3] = {};
406
10.8M
  float block00 = coefficients[0];
407
10.8M
  float block01 = coefficients[1];
408
10.8M
  float block10 = coefficients[8];
409
10.8M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
10.8M
  dcs[1] = (block00 + block10 - block01);
411
10.8M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
10.8M
  HWY_ALIGN float coeff[4 * 4];
414
10.8M
  coeff[0] = dcs[0];
415
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
416
216M
    for (size_t ix = 0; ix < 4; ix++) {
417
173M
      if (ix == 0 && iy == 0) continue;
418
162M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
162M
    }
420
43.3M
  }
421
10.8M
  HWY_ALIGN float block[4 * 8];
422
10.8M
  AFVIDCT4x4(coeff, block);
423
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
424
216M
    for (size_t ix = 0; ix < 4; ix++) {
425
173M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
173M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
173M
    }
428
43.3M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
10.8M
  block[0] = dcs[1];
431
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
432
216M
    for (size_t ix = 0; ix < 4; ix++) {
433
173M
      if (ix == 0 && iy == 0) continue;
434
162M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
162M
    }
436
43.3M
  }
437
10.8M
  ComputeScaledIDCT<4, 4>()(
438
10.8M
      block,
439
10.8M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
10.8M
            pixels_stride),
441
10.8M
      scratch_space);
442
  // IDCT4x8.
443
10.8M
  block[0] = dcs[2];
444
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
445
389M
    for (size_t ix = 0; ix < 8; ix++) {
446
346M
      if (ix == 0 && iy == 0) continue;
447
335M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
335M
    }
449
43.3M
  }
450
10.8M
  ComputeScaledIDCT<4, 8>()(
451
10.8M
      block,
452
10.8M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
10.8M
      scratch_space);
454
10.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
10.8M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
10.8M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
10.8M
  size_t afv_x = afv_kind & 1;
404
10.8M
  size_t afv_y = afv_kind / 2;
405
10.8M
  float dcs[3] = {};
406
10.8M
  float block00 = coefficients[0];
407
10.8M
  float block01 = coefficients[1];
408
10.8M
  float block10 = coefficients[8];
409
10.8M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
10.8M
  dcs[1] = (block00 + block10 - block01);
411
10.8M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
10.8M
  HWY_ALIGN float coeff[4 * 4];
414
10.8M
  coeff[0] = dcs[0];
415
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
416
216M
    for (size_t ix = 0; ix < 4; ix++) {
417
173M
      if (ix == 0 && iy == 0) continue;
418
162M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
162M
    }
420
43.3M
  }
421
10.8M
  HWY_ALIGN float block[4 * 8];
422
10.8M
  AFVIDCT4x4(coeff, block);
423
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
424
216M
    for (size_t ix = 0; ix < 4; ix++) {
425
173M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
173M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
173M
    }
428
43.3M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
10.8M
  block[0] = dcs[1];
431
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
432
216M
    for (size_t ix = 0; ix < 4; ix++) {
433
173M
      if (ix == 0 && iy == 0) continue;
434
162M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
162M
    }
436
43.3M
  }
437
10.8M
  ComputeScaledIDCT<4, 4>()(
438
10.8M
      block,
439
10.8M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
10.8M
            pixels_stride),
441
10.8M
      scratch_space);
442
  // IDCT4x8.
443
10.8M
  block[0] = dcs[2];
444
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
445
389M
    for (size_t ix = 0; ix < 8; ix++) {
446
346M
      if (ix == 0 && iy == 0) continue;
447
335M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
335M
    }
449
43.3M
  }
450
10.8M
  ComputeScaledIDCT<4, 8>()(
451
10.8M
      block,
452
10.8M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
10.8M
      scratch_space);
454
10.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
10.8M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
10.8M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
10.8M
  size_t afv_x = afv_kind & 1;
404
10.8M
  size_t afv_y = afv_kind / 2;
405
10.8M
  float dcs[3] = {};
406
10.8M
  float block00 = coefficients[0];
407
10.8M
  float block01 = coefficients[1];
408
10.8M
  float block10 = coefficients[8];
409
10.8M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
10.8M
  dcs[1] = (block00 + block10 - block01);
411
10.8M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
10.8M
  HWY_ALIGN float coeff[4 * 4];
414
10.8M
  coeff[0] = dcs[0];
415
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
416
216M
    for (size_t ix = 0; ix < 4; ix++) {
417
173M
      if (ix == 0 && iy == 0) continue;
418
162M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
162M
    }
420
43.3M
  }
421
10.8M
  HWY_ALIGN float block[4 * 8];
422
10.8M
  AFVIDCT4x4(coeff, block);
423
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
424
216M
    for (size_t ix = 0; ix < 4; ix++) {
425
173M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
173M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
173M
    }
428
43.3M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
10.8M
  block[0] = dcs[1];
431
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
432
216M
    for (size_t ix = 0; ix < 4; ix++) {
433
173M
      if (ix == 0 && iy == 0) continue;
434
162M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
162M
    }
436
43.3M
  }
437
10.8M
  ComputeScaledIDCT<4, 4>()(
438
10.8M
      block,
439
10.8M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
10.8M
            pixels_stride),
441
10.8M
      scratch_space);
442
  // IDCT4x8.
443
10.8M
  block[0] = dcs[2];
444
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
445
389M
    for (size_t ix = 0; ix < 8; ix++) {
446
346M
      if (ix == 0 && iy == 0) continue;
447
335M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
335M
    }
449
43.3M
  }
450
10.8M
  ComputeScaledIDCT<4, 8>()(
451
10.8M
      block,
452
10.8M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
10.8M
      scratch_space);
454
10.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
10.8M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
10.8M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
10.8M
  size_t afv_x = afv_kind & 1;
404
10.8M
  size_t afv_y = afv_kind / 2;
405
10.8M
  float dcs[3] = {};
406
10.8M
  float block00 = coefficients[0];
407
10.8M
  float block01 = coefficients[1];
408
10.8M
  float block10 = coefficients[8];
409
10.8M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
10.8M
  dcs[1] = (block00 + block10 - block01);
411
10.8M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
10.8M
  HWY_ALIGN float coeff[4 * 4];
414
10.8M
  coeff[0] = dcs[0];
415
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
416
216M
    for (size_t ix = 0; ix < 4; ix++) {
417
173M
      if (ix == 0 && iy == 0) continue;
418
162M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
162M
    }
420
43.3M
  }
421
10.8M
  HWY_ALIGN float block[4 * 8];
422
10.8M
  AFVIDCT4x4(coeff, block);
423
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
424
216M
    for (size_t ix = 0; ix < 4; ix++) {
425
173M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
173M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
173M
    }
428
43.3M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
10.8M
  block[0] = dcs[1];
431
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
432
216M
    for (size_t ix = 0; ix < 4; ix++) {
433
173M
      if (ix == 0 && iy == 0) continue;
434
162M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
162M
    }
436
43.3M
  }
437
10.8M
  ComputeScaledIDCT<4, 4>()(
438
10.8M
      block,
439
10.8M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
10.8M
            pixels_stride),
441
10.8M
      scratch_space);
442
  // IDCT4x8.
443
10.8M
  block[0] = dcs[2];
444
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
445
389M
    for (size_t ix = 0; ix < 8; ix++) {
446
346M
      if (ix == 0 && iy == 0) continue;
447
335M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
335M
    }
449
43.3M
  }
450
10.8M
  ComputeScaledIDCT<4, 8>()(
451
10.8M
      block,
452
10.8M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
10.8M
      scratch_space);
454
10.8M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
281k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
281k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
281k
  size_t afv_x = afv_kind & 1;
404
281k
  size_t afv_y = afv_kind / 2;
405
281k
  float dcs[3] = {};
406
281k
  float block00 = coefficients[0];
407
281k
  float block01 = coefficients[1];
408
281k
  float block10 = coefficients[8];
409
281k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
281k
  dcs[1] = (block00 + block10 - block01);
411
281k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
281k
  HWY_ALIGN float coeff[4 * 4];
414
281k
  coeff[0] = dcs[0];
415
1.40M
  for (size_t iy = 0; iy < 4; iy++) {
416
5.62M
    for (size_t ix = 0; ix < 4; ix++) {
417
4.49M
      if (ix == 0 && iy == 0) continue;
418
4.21M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
4.21M
    }
420
1.12M
  }
421
281k
  HWY_ALIGN float block[4 * 8];
422
281k
  AFVIDCT4x4(coeff, block);
423
1.40M
  for (size_t iy = 0; iy < 4; iy++) {
424
5.62M
    for (size_t ix = 0; ix < 4; ix++) {
425
4.49M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
4.49M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
4.49M
    }
428
1.12M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
281k
  block[0] = dcs[1];
431
1.40M
  for (size_t iy = 0; iy < 4; iy++) {
432
5.62M
    for (size_t ix = 0; ix < 4; ix++) {
433
4.49M
      if (ix == 0 && iy == 0) continue;
434
4.21M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
4.21M
    }
436
1.12M
  }
437
281k
  ComputeScaledIDCT<4, 4>()(
438
281k
      block,
439
281k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
281k
            pixels_stride),
441
281k
      scratch_space);
442
  // IDCT4x8.
443
281k
  block[0] = dcs[2];
444
1.40M
  for (size_t iy = 0; iy < 4; iy++) {
445
10.1M
    for (size_t ix = 0; ix < 8; ix++) {
446
8.99M
      if (ix == 0 && iy == 0) continue;
447
8.71M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
8.71M
    }
449
1.12M
  }
450
281k
  ComputeScaledIDCT<4, 8>()(
451
281k
      block,
452
281k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
281k
      scratch_space);
454
281k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
158k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
158k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
158k
  size_t afv_x = afv_kind & 1;
404
158k
  size_t afv_y = afv_kind / 2;
405
158k
  float dcs[3] = {};
406
158k
  float block00 = coefficients[0];
407
158k
  float block01 = coefficients[1];
408
158k
  float block10 = coefficients[8];
409
158k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
158k
  dcs[1] = (block00 + block10 - block01);
411
158k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
158k
  HWY_ALIGN float coeff[4 * 4];
414
158k
  coeff[0] = dcs[0];
415
792k
  for (size_t iy = 0; iy < 4; iy++) {
416
3.16M
    for (size_t ix = 0; ix < 4; ix++) {
417
2.53M
      if (ix == 0 && iy == 0) continue;
418
2.37M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
2.37M
    }
420
633k
  }
421
158k
  HWY_ALIGN float block[4 * 8];
422
158k
  AFVIDCT4x4(coeff, block);
423
792k
  for (size_t iy = 0; iy < 4; iy++) {
424
3.16M
    for (size_t ix = 0; ix < 4; ix++) {
425
2.53M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
2.53M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
2.53M
    }
428
633k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
158k
  block[0] = dcs[1];
431
792k
  for (size_t iy = 0; iy < 4; iy++) {
432
3.16M
    for (size_t ix = 0; ix < 4; ix++) {
433
2.53M
      if (ix == 0 && iy == 0) continue;
434
2.37M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
2.37M
    }
436
633k
  }
437
158k
  ComputeScaledIDCT<4, 4>()(
438
158k
      block,
439
158k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
158k
            pixels_stride),
441
158k
      scratch_space);
442
  // IDCT4x8.
443
158k
  block[0] = dcs[2];
444
792k
  for (size_t iy = 0; iy < 4; iy++) {
445
5.70M
    for (size_t ix = 0; ix < 8; ix++) {
446
5.07M
      if (ix == 0 && iy == 0) continue;
447
4.91M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
4.91M
    }
449
633k
  }
450
158k
  ComputeScaledIDCT<4, 8>()(
451
158k
      block,
452
158k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
158k
      scratch_space);
454
158k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
197k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
197k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
197k
  size_t afv_x = afv_kind & 1;
404
197k
  size_t afv_y = afv_kind / 2;
405
197k
  float dcs[3] = {};
406
197k
  float block00 = coefficients[0];
407
197k
  float block01 = coefficients[1];
408
197k
  float block10 = coefficients[8];
409
197k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
197k
  dcs[1] = (block00 + block10 - block01);
411
197k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
197k
  HWY_ALIGN float coeff[4 * 4];
414
197k
  coeff[0] = dcs[0];
415
985k
  for (size_t iy = 0; iy < 4; iy++) {
416
3.94M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.15M
      if (ix == 0 && iy == 0) continue;
418
2.95M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
2.95M
    }
420
788k
  }
421
197k
  HWY_ALIGN float block[4 * 8];
422
197k
  AFVIDCT4x4(coeff, block);
423
985k
  for (size_t iy = 0; iy < 4; iy++) {
424
3.94M
    for (size_t ix = 0; ix < 4; ix++) {
425
3.15M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
3.15M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
3.15M
    }
428
788k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
197k
  block[0] = dcs[1];
431
985k
  for (size_t iy = 0; iy < 4; iy++) {
432
3.94M
    for (size_t ix = 0; ix < 4; ix++) {
433
3.15M
      if (ix == 0 && iy == 0) continue;
434
2.95M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
2.95M
    }
436
788k
  }
437
197k
  ComputeScaledIDCT<4, 4>()(
438
197k
      block,
439
197k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
197k
            pixels_stride),
441
197k
      scratch_space);
442
  // IDCT4x8.
443
197k
  block[0] = dcs[2];
444
985k
  for (size_t iy = 0; iy < 4; iy++) {
445
7.09M
    for (size_t ix = 0; ix < 8; ix++) {
446
6.30M
      if (ix == 0 && iy == 0) continue;
447
6.11M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
6.11M
    }
449
788k
  }
450
197k
  ComputeScaledIDCT<4, 8>()(
451
197k
      block,
452
197k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
197k
      scratch_space);
454
197k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
190k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
190k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
190k
  size_t afv_x = afv_kind & 1;
404
190k
  size_t afv_y = afv_kind / 2;
405
190k
  float dcs[3] = {};
406
190k
  float block00 = coefficients[0];
407
190k
  float block01 = coefficients[1];
408
190k
  float block10 = coefficients[8];
409
190k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
190k
  dcs[1] = (block00 + block10 - block01);
411
190k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
190k
  HWY_ALIGN float coeff[4 * 4];
414
190k
  coeff[0] = dcs[0];
415
950k
  for (size_t iy = 0; iy < 4; iy++) {
416
3.80M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.04M
      if (ix == 0 && iy == 0) continue;
418
2.85M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
2.85M
    }
420
760k
  }
421
190k
  HWY_ALIGN float block[4 * 8];
422
190k
  AFVIDCT4x4(coeff, block);
423
950k
  for (size_t iy = 0; iy < 4; iy++) {
424
3.80M
    for (size_t ix = 0; ix < 4; ix++) {
425
3.04M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
3.04M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
3.04M
    }
428
760k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
190k
  block[0] = dcs[1];
431
950k
  for (size_t iy = 0; iy < 4; iy++) {
432
3.80M
    for (size_t ix = 0; ix < 4; ix++) {
433
3.04M
      if (ix == 0 && iy == 0) continue;
434
2.85M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
2.85M
    }
436
760k
  }
437
190k
  ComputeScaledIDCT<4, 4>()(
438
190k
      block,
439
190k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
190k
            pixels_stride),
441
190k
      scratch_space);
442
  // IDCT4x8.
443
190k
  block[0] = dcs[2];
444
950k
  for (size_t iy = 0; iy < 4; iy++) {
445
6.84M
    for (size_t ix = 0; ix < 8; ix++) {
446
6.08M
      if (ix == 0 && iy == 0) continue;
447
5.89M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
5.89M
    }
449
760k
  }
450
190k
  ComputeScaledIDCT<4, 8>()(
451
190k
      block,
452
190k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
190k
      scratch_space);
454
190k
}
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
455
456
HWY_MAYBE_UNUSED void TransformToPixels(const AcStrategyType strategy,
457
                                        float* JXL_RESTRICT coefficients,
458
                                        float* JXL_RESTRICT pixels,
459
                                        size_t pixels_stride,
460
152M
                                        float* scratch_space) {
461
152M
  using Type = AcStrategyType;
462
152M
  switch (strategy) {
463
13.6M
    case Type::IDENTITY: {
464
13.6M
      float dcs[4] = {};
465
13.6M
      float block00 = coefficients[0];
466
13.6M
      float block01 = coefficients[1];
467
13.6M
      float block10 = coefficients[8];
468
13.6M
      float block11 = coefficients[9];
469
13.6M
      dcs[0] = block00 + block01 + block10 + block11;
470
13.6M
      dcs[1] = block00 + block01 - block10 - block11;
471
13.6M
      dcs[2] = block00 - block01 + block10 - block11;
472
13.6M
      dcs[3] = block00 - block01 - block10 + block11;
473
40.8M
      for (size_t y = 0; y < 2; y++) {
474
81.6M
        for (size_t x = 0; x < 2; x++) {
475
54.4M
          float block_dc = dcs[y * 2 + x];
476
54.4M
          float residual_sum = 0;
477
272M
          for (size_t iy = 0; iy < 4; iy++) {
478
1.08G
            for (size_t ix = 0; ix < 4; ix++) {
479
871M
              if (ix == 0 && iy == 0) continue;
480
816M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
816M
            }
482
217M
          }
483
54.4M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
54.4M
              block_dc - residual_sum * (1.0f / 16);
485
272M
          for (size_t iy = 0; iy < 4; iy++) {
486
1.08G
            for (size_t ix = 0; ix < 4; ix++) {
487
871M
              if (ix == 1 && iy == 1) continue;
488
816M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
816M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
816M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
816M
            }
492
217M
          }
493
54.4M
          pixels[y * 4 * pixels_stride + x * 4] =
494
54.4M
              coefficients[(y + 2) * 8 + x + 2] +
495
54.4M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
54.4M
        }
497
27.2M
      }
498
13.6M
      break;
499
0
    }
500
11.2M
    case Type::DCT8X4: {
501
11.2M
      float dcs[2] = {};
502
11.2M
      float block0 = coefficients[0];
503
11.2M
      float block1 = coefficients[8];
504
11.2M
      dcs[0] = block0 + block1;
505
11.2M
      dcs[1] = block0 - block1;
506
33.6M
      for (size_t x = 0; x < 2; x++) {
507
22.4M
        HWY_ALIGN float block[4 * 8];
508
22.4M
        block[0] = dcs[x];
509
112M
        for (size_t iy = 0; iy < 4; iy++) {
510
808M
          for (size_t ix = 0; ix < 8; ix++) {
511
718M
            if (ix == 0 && iy == 0) continue;
512
695M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
695M
          }
514
89.7M
        }
515
22.4M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
22.4M
                                  scratch_space);
517
22.4M
      }
518
11.2M
      break;
519
0
    }
520
10.9M
    case Type::DCT4X8: {
521
10.9M
      float dcs[2] = {};
522
10.9M
      float block0 = coefficients[0];
523
10.9M
      float block1 = coefficients[8];
524
10.9M
      dcs[0] = block0 + block1;
525
10.9M
      dcs[1] = block0 - block1;
526
32.9M
      for (size_t y = 0; y < 2; y++) {
527
21.9M
        HWY_ALIGN float block[4 * 8];
528
21.9M
        block[0] = dcs[y];
529
109M
        for (size_t iy = 0; iy < 4; iy++) {
530
791M
          for (size_t ix = 0; ix < 8; ix++) {
531
703M
            if (ix == 0 && iy == 0) continue;
532
681M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
681M
          }
534
87.9M
        }
535
21.9M
        ComputeScaledIDCT<4, 8>()(
536
21.9M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
21.9M
            scratch_space);
538
21.9M
      }
539
10.9M
      break;
540
0
    }
541
10.8M
    case Type::DCT4X4: {
542
10.8M
      float dcs[4] = {};
543
10.8M
      float block00 = coefficients[0];
544
10.8M
      float block01 = coefficients[1];
545
10.8M
      float block10 = coefficients[8];
546
10.8M
      float block11 = coefficients[9];
547
10.8M
      dcs[0] = block00 + block01 + block10 + block11;
548
10.8M
      dcs[1] = block00 + block01 - block10 - block11;
549
10.8M
      dcs[2] = block00 - block01 + block10 - block11;
550
10.8M
      dcs[3] = block00 - block01 - block10 + block11;
551
32.4M
      for (size_t y = 0; y < 2; y++) {
552
64.9M
        for (size_t x = 0; x < 2; x++) {
553
43.3M
          HWY_ALIGN float block[4 * 4];
554
43.3M
          block[0] = dcs[y * 2 + x];
555
216M
          for (size_t iy = 0; iy < 4; iy++) {
556
866M
            for (size_t ix = 0; ix < 4; ix++) {
557
693M
              if (ix == 0 && iy == 0) continue;
558
649M
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
649M
            }
560
173M
          }
561
43.3M
          ComputeScaledIDCT<4, 4>()(
562
43.3M
              block,
563
43.3M
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
43.3M
              scratch_space);
565
43.3M
        }
566
21.6M
      }
567
10.8M
      break;
568
0
    }
569
16.5M
    case Type::DCT2X2: {
570
16.5M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
16.5M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
16.5M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
16.5M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
16.5M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
149M
      for (size_t y = 0; y < kBlockDim; y++) {
576
1.19G
        for (size_t x = 0; x < kBlockDim; x++) {
577
1.06G
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
1.06G
        }
579
132M
      }
580
16.5M
      break;
581
0
    }
582
4.94M
    case Type::DCT16X16: {
583
4.94M
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
4.94M
                                  scratch_space);
585
4.94M
      break;
586
0
    }
587
9.53M
    case Type::DCT16X8: {
588
9.53M
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
9.53M
                                 scratch_space);
590
9.53M
      break;
591
0
    }
592
9.56M
    case Type::DCT8X16: {
593
9.56M
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
9.56M
                                 scratch_space);
595
9.56M
      break;
596
0
    }
597
714
    case Type::DCT32X8: {
598
714
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
714
                                 scratch_space);
600
714
      break;
601
0
    }
602
108
    case Type::DCT8X32: {
603
108
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
108
                                 scratch_space);
605
108
      break;
606
0
    }
607
1.88M
    case Type::DCT32X16: {
608
1.88M
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
1.88M
                                  scratch_space);
610
1.88M
      break;
611
0
    }
612
1.85M
    case Type::DCT16X32: {
613
1.85M
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
1.85M
                                  scratch_space);
615
1.85M
      break;
616
0
    }
617
1.09M
    case Type::DCT32X32: {
618
1.09M
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
1.09M
                                  scratch_space);
620
1.09M
      break;
621
0
    }
622
15.0M
    case Type::DCT: {
623
15.0M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
15.0M
                                scratch_space);
625
15.0M
      break;
626
0
    }
627
11.1M
    case Type::AFV0: {
628
11.1M
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
11.1M
      break;
630
0
    }
631
10.9M
    case Type::AFV1: {
632
10.9M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
10.9M
      break;
634
0
    }
635
11.0M
    case Type::AFV2: {
636
11.0M
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
11.0M
      break;
638
0
    }
639
11.0M
    case Type::AFV3: {
640
11.0M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
11.0M
      break;
642
0
    }
643
567k
    case Type::DCT64X32: {
644
567k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
567k
                                  scratch_space);
646
567k
      break;
647
0
    }
648
338k
    case Type::DCT32X64: {
649
338k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
338k
                                  scratch_space);
651
338k
      break;
652
0
    }
653
269k
    case Type::DCT64X64: {
654
269k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
269k
                                  scratch_space);
656
269k
      break;
657
0
    }
658
3
    case Type::DCT128X64: {
659
3
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
3
                                   scratch_space);
661
3
      break;
662
0
    }
663
9
    case Type::DCT64X128: {
664
9
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
9
                                   scratch_space);
666
9
      break;
667
0
    }
668
6
    case Type::DCT128X128: {
669
6
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
6
                                    scratch_space);
671
6
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
152M
  }
689
152M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
135M
                                        float* scratch_space) {
461
135M
  using Type = AcStrategyType;
462
135M
  switch (strategy) {
463
10.8M
    case Type::IDENTITY: {
464
10.8M
      float dcs[4] = {};
465
10.8M
      float block00 = coefficients[0];
466
10.8M
      float block01 = coefficients[1];
467
10.8M
      float block10 = coefficients[8];
468
10.8M
      float block11 = coefficients[9];
469
10.8M
      dcs[0] = block00 + block01 + block10 + block11;
470
10.8M
      dcs[1] = block00 + block01 - block10 - block11;
471
10.8M
      dcs[2] = block00 - block01 + block10 - block11;
472
10.8M
      dcs[3] = block00 - block01 - block10 + block11;
473
32.4M
      for (size_t y = 0; y < 2; y++) {
474
64.9M
        for (size_t x = 0; x < 2; x++) {
475
43.3M
          float block_dc = dcs[y * 2 + x];
476
43.3M
          float residual_sum = 0;
477
216M
          for (size_t iy = 0; iy < 4; iy++) {
478
866M
            for (size_t ix = 0; ix < 4; ix++) {
479
692M
              if (ix == 0 && iy == 0) continue;
480
649M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
649M
            }
482
173M
          }
483
43.3M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
43.3M
              block_dc - residual_sum * (1.0f / 16);
485
216M
          for (size_t iy = 0; iy < 4; iy++) {
486
866M
            for (size_t ix = 0; ix < 4; ix++) {
487
692M
              if (ix == 1 && iy == 1) continue;
488
649M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
649M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
649M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
649M
            }
492
173M
          }
493
43.3M
          pixels[y * 4 * pixels_stride + x * 4] =
494
43.3M
              coefficients[(y + 2) * 8 + x + 2] +
495
43.3M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
43.3M
        }
497
21.6M
      }
498
10.8M
      break;
499
0
    }
500
10.8M
    case Type::DCT8X4: {
501
10.8M
      float dcs[2] = {};
502
10.8M
      float block0 = coefficients[0];
503
10.8M
      float block1 = coefficients[8];
504
10.8M
      dcs[0] = block0 + block1;
505
10.8M
      dcs[1] = block0 - block1;
506
32.4M
      for (size_t x = 0; x < 2; x++) {
507
21.6M
        HWY_ALIGN float block[4 * 8];
508
21.6M
        block[0] = dcs[x];
509
108M
        for (size_t iy = 0; iy < 4; iy++) {
510
779M
          for (size_t ix = 0; ix < 8; ix++) {
511
692M
            if (ix == 0 && iy == 0) continue;
512
671M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
671M
          }
514
86.6M
        }
515
21.6M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
21.6M
                                  scratch_space);
517
21.6M
      }
518
10.8M
      break;
519
0
    }
520
10.8M
    case Type::DCT4X8: {
521
10.8M
      float dcs[2] = {};
522
10.8M
      float block0 = coefficients[0];
523
10.8M
      float block1 = coefficients[8];
524
10.8M
      dcs[0] = block0 + block1;
525
10.8M
      dcs[1] = block0 - block1;
526
32.4M
      for (size_t y = 0; y < 2; y++) {
527
21.6M
        HWY_ALIGN float block[4 * 8];
528
21.6M
        block[0] = dcs[y];
529
108M
        for (size_t iy = 0; iy < 4; iy++) {
530
779M
          for (size_t ix = 0; ix < 8; ix++) {
531
692M
            if (ix == 0 && iy == 0) continue;
532
671M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
671M
          }
534
86.6M
        }
535
21.6M
        ComputeScaledIDCT<4, 8>()(
536
21.6M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
21.6M
            scratch_space);
538
21.6M
      }
539
10.8M
      break;
540
0
    }
541
10.8M
    case Type::DCT4X4: {
542
10.8M
      float dcs[4] = {};
543
10.8M
      float block00 = coefficients[0];
544
10.8M
      float block01 = coefficients[1];
545
10.8M
      float block10 = coefficients[8];
546
10.8M
      float block11 = coefficients[9];
547
10.8M
      dcs[0] = block00 + block01 + block10 + block11;
548
10.8M
      dcs[1] = block00 + block01 - block10 - block11;
549
10.8M
      dcs[2] = block00 - block01 + block10 - block11;
550
10.8M
      dcs[3] = block00 - block01 - block10 + block11;
551
32.4M
      for (size_t y = 0; y < 2; y++) {
552
64.9M
        for (size_t x = 0; x < 2; x++) {
553
43.3M
          HWY_ALIGN float block[4 * 4];
554
43.3M
          block[0] = dcs[y * 2 + x];
555
216M
          for (size_t iy = 0; iy < 4; iy++) {
556
866M
            for (size_t ix = 0; ix < 4; ix++) {
557
692M
              if (ix == 0 && iy == 0) continue;
558
649M
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
649M
            }
560
173M
          }
561
43.3M
          ComputeScaledIDCT<4, 4>()(
562
43.3M
              block,
563
43.3M
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
43.3M
              scratch_space);
565
43.3M
        }
566
21.6M
      }
567
10.8M
      break;
568
0
    }
569
10.8M
    case Type::DCT2X2: {
570
10.8M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
10.8M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
10.8M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
10.8M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
10.8M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
97.4M
      for (size_t y = 0; y < kBlockDim; y++) {
576
779M
        for (size_t x = 0; x < kBlockDim; x++) {
577
692M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
692M
        }
579
86.6M
      }
580
10.8M
      break;
581
0
    }
582
4.51M
    case Type::DCT16X16: {
583
4.51M
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
4.51M
                                  scratch_space);
585
4.51M
      break;
586
0
    }
587
8.91M
    case Type::DCT16X8: {
588
8.91M
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
8.91M
                                 scratch_space);
590
8.91M
      break;
591
0
    }
592
8.89M
    case Type::DCT8X16: {
593
8.89M
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
8.89M
                                 scratch_space);
595
8.89M
      break;
596
0
    }
597
0
    case Type::DCT32X8: {
598
0
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
0
                                 scratch_space);
600
0
      break;
601
0
    }
602
0
    case Type::DCT8X32: {
603
0
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
0
                                 scratch_space);
605
0
      break;
606
0
    }
607
1.74M
    case Type::DCT32X16: {
608
1.74M
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
1.74M
                                  scratch_space);
610
1.74M
      break;
611
0
    }
612
1.72M
    case Type::DCT16X32: {
613
1.72M
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
1.72M
                                  scratch_space);
615
1.72M
      break;
616
0
    }
617
881k
    case Type::DCT32X32: {
618
881k
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
881k
                                  scratch_space);
620
881k
      break;
621
0
    }
622
10.8M
    case Type::DCT: {
623
10.8M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
10.8M
                                scratch_space);
625
10.8M
      break;
626
0
    }
627
10.8M
    case Type::AFV0: {
628
10.8M
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
10.8M
      break;
630
0
    }
631
10.8M
    case Type::AFV1: {
632
10.8M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
10.8M
      break;
634
0
    }
635
10.8M
    case Type::AFV2: {
636
10.8M
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
10.8M
      break;
638
0
    }
639
10.8M
    case Type::AFV3: {
640
10.8M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
10.8M
      break;
642
0
    }
643
525k
    case Type::DCT64X32: {
644
525k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
525k
                                  scratch_space);
646
525k
      break;
647
0
    }
648
321k
    case Type::DCT32X64: {
649
321k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
321k
                                  scratch_space);
651
321k
      break;
652
0
    }
653
148k
    case Type::DCT64X64: {
654
148k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
148k
                                  scratch_space);
656
148k
      break;
657
0
    }
658
0
    case Type::DCT128X64: {
659
0
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT64X128: {
664
0
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
0
    case Type::DCT128X128: {
669
0
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
0
                                    scratch_space);
671
0
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
135M
  }
689
135M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
16.5M
                                        float* scratch_space) {
461
16.5M
  using Type = AcStrategyType;
462
16.5M
  switch (strategy) {
463
2.78M
    case Type::IDENTITY: {
464
2.78M
      float dcs[4] = {};
465
2.78M
      float block00 = coefficients[0];
466
2.78M
      float block01 = coefficients[1];
467
2.78M
      float block10 = coefficients[8];
468
2.78M
      float block11 = coefficients[9];
469
2.78M
      dcs[0] = block00 + block01 + block10 + block11;
470
2.78M
      dcs[1] = block00 + block01 - block10 - block11;
471
2.78M
      dcs[2] = block00 - block01 + block10 - block11;
472
2.78M
      dcs[3] = block00 - block01 - block10 + block11;
473
8.36M
      for (size_t y = 0; y < 2; y++) {
474
16.7M
        for (size_t x = 0; x < 2; x++) {
475
11.1M
          float block_dc = dcs[y * 2 + x];
476
11.1M
          float residual_sum = 0;
477
55.7M
          for (size_t iy = 0; iy < 4; iy++) {
478
222M
            for (size_t ix = 0; ix < 4; ix++) {
479
178M
              if (ix == 0 && iy == 0) continue;
480
167M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
167M
            }
482
44.5M
          }
483
11.1M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
11.1M
              block_dc - residual_sum * (1.0f / 16);
485
55.7M
          for (size_t iy = 0; iy < 4; iy++) {
486
222M
            for (size_t ix = 0; ix < 4; ix++) {
487
178M
              if (ix == 1 && iy == 1) continue;
488
167M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
167M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
167M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
167M
            }
492
44.5M
          }
493
11.1M
          pixels[y * 4 * pixels_stride + x * 4] =
494
11.1M
              coefficients[(y + 2) * 8 + x + 2] +
495
11.1M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
11.1M
        }
497
5.57M
      }
498
2.78M
      break;
499
0
    }
500
396k
    case Type::DCT8X4: {
501
396k
      float dcs[2] = {};
502
396k
      float block0 = coefficients[0];
503
396k
      float block1 = coefficients[8];
504
396k
      dcs[0] = block0 + block1;
505
396k
      dcs[1] = block0 - block1;
506
1.18M
      for (size_t x = 0; x < 2; x++) {
507
792k
        HWY_ALIGN float block[4 * 8];
508
792k
        block[0] = dcs[x];
509
3.96M
        for (size_t iy = 0; iy < 4; iy++) {
510
28.5M
          for (size_t ix = 0; ix < 8; ix++) {
511
25.3M
            if (ix == 0 && iy == 0) continue;
512
24.5M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
24.5M
          }
514
3.16M
        }
515
792k
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
792k
                                  scratch_space);
517
792k
      }
518
396k
      break;
519
0
    }
520
171k
    case Type::DCT4X8: {
521
171k
      float dcs[2] = {};
522
171k
      float block0 = coefficients[0];
523
171k
      float block1 = coefficients[8];
524
171k
      dcs[0] = block0 + block1;
525
171k
      dcs[1] = block0 - block1;
526
515k
      for (size_t y = 0; y < 2; y++) {
527
343k
        HWY_ALIGN float block[4 * 8];
528
343k
        block[0] = dcs[y];
529
1.71M
        for (size_t iy = 0; iy < 4; iy++) {
530
12.3M
          for (size_t ix = 0; ix < 8; ix++) {
531
11.0M
            if (ix == 0 && iy == 0) continue;
532
10.6M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
10.6M
          }
534
1.37M
        }
535
343k
        ComputeScaledIDCT<4, 8>()(
536
343k
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
343k
            scratch_space);
538
343k
      }
539
171k
      break;
540
0
    }
541
1.72k
    case Type::DCT4X4: {
542
1.72k
      float dcs[4] = {};
543
1.72k
      float block00 = coefficients[0];
544
1.72k
      float block01 = coefficients[1];
545
1.72k
      float block10 = coefficients[8];
546
1.72k
      float block11 = coefficients[9];
547
1.72k
      dcs[0] = block00 + block01 + block10 + block11;
548
1.72k
      dcs[1] = block00 + block01 - block10 - block11;
549
1.72k
      dcs[2] = block00 - block01 + block10 - block11;
550
1.72k
      dcs[3] = block00 - block01 - block10 + block11;
551
5.18k
      for (size_t y = 0; y < 2; y++) {
552
10.3k
        for (size_t x = 0; x < 2; x++) {
553
6.91k
          HWY_ALIGN float block[4 * 4];
554
6.91k
          block[0] = dcs[y * 2 + x];
555
34.5k
          for (size_t iy = 0; iy < 4; iy++) {
556
138k
            for (size_t ix = 0; ix < 4; ix++) {
557
110k
              if (ix == 0 && iy == 0) continue;
558
103k
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
103k
            }
560
27.6k
          }
561
6.91k
          ComputeScaledIDCT<4, 4>()(
562
6.91k
              block,
563
6.91k
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
6.91k
              scratch_space);
565
6.91k
        }
566
3.45k
      }
567
1.72k
      break;
568
0
    }
569
5.76M
    case Type::DCT2X2: {
570
5.76M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
5.76M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
5.76M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
5.76M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
5.76M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
51.8M
      for (size_t y = 0; y < kBlockDim; y++) {
576
414M
        for (size_t x = 0; x < kBlockDim; x++) {
577
368M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
368M
        }
579
46.0M
      }
580
5.76M
      break;
581
0
    }
582
424k
    case Type::DCT16X16: {
583
424k
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
424k
                                  scratch_space);
585
424k
      break;
586
0
    }
587
613k
    case Type::DCT16X8: {
588
613k
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
613k
                                 scratch_space);
590
613k
      break;
591
0
    }
592
665k
    case Type::DCT8X16: {
593
665k
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
665k
                                 scratch_space);
595
665k
      break;
596
0
    }
597
714
    case Type::DCT32X8: {
598
714
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
714
                                 scratch_space);
600
714
      break;
601
0
    }
602
108
    case Type::DCT8X32: {
603
108
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
108
                                 scratch_space);
605
108
      break;
606
0
    }
607
137k
    case Type::DCT32X16: {
608
137k
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
137k
                                  scratch_space);
610
137k
      break;
611
0
    }
612
134k
    case Type::DCT16X32: {
613
134k
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
134k
                                  scratch_space);
615
134k
      break;
616
0
    }
617
209k
    case Type::DCT32X32: {
618
209k
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
209k
                                  scratch_space);
620
209k
      break;
621
0
    }
622
4.23M
    case Type::DCT: {
623
4.23M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
4.23M
                                scratch_space);
625
4.23M
      break;
626
0
    }
627
281k
    case Type::AFV0: {
628
281k
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
281k
      break;
630
0
    }
631
158k
    case Type::AFV1: {
632
158k
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
158k
      break;
634
0
    }
635
197k
    case Type::AFV2: {
636
197k
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
197k
      break;
638
0
    }
639
190k
    case Type::AFV3: {
640
190k
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
190k
      break;
642
0
    }
643
41.7k
    case Type::DCT64X32: {
644
41.7k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
41.7k
                                  scratch_space);
646
41.7k
      break;
647
0
    }
648
17.0k
    case Type::DCT32X64: {
649
17.0k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
17.0k
                                  scratch_space);
651
17.0k
      break;
652
0
    }
653
120k
    case Type::DCT64X64: {
654
120k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
120k
                                  scratch_space);
656
120k
      break;
657
0
    }
658
3
    case Type::DCT128X64: {
659
3
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
3
                                   scratch_space);
661
3
      break;
662
0
    }
663
9
    case Type::DCT64X128: {
664
9
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
9
                                   scratch_space);
666
9
      break;
667
0
    }
668
6
    case Type::DCT128X128: {
669
6
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
6
                                    scratch_space);
671
6
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
16.5M
  }
689
16.5M
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
690
691
HWY_MAYBE_UNUSED void LowestFrequenciesFromDC(const AcStrategyType strategy,
692
                                              const float* dc, size_t dc_stride,
693
                                              float* llf,
694
16.7M
                                              float* JXL_RESTRICT scratch) {
695
16.7M
  using Type = AcStrategyType;
696
16.7M
  HWY_ALIGN float warm_block[4 * 4];
697
16.7M
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
16.7M
  switch (strategy) {
699
613k
    case Type::DCT16X8: {
700
613k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
613k
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
613k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
613k
      break;
704
0
    }
705
665k
    case Type::DCT8X16: {
706
665k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
665k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
665k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
665k
      break;
710
0
    }
711
424k
    case Type::DCT16X16: {
712
424k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
424k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
424k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
424k
      break;
716
0
    }
717
714
    case Type::DCT32X8: {
718
714
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
714
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
714
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
714
      break;
722
0
    }
723
108
    case Type::DCT8X32: {
724
108
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
108
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
108
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
108
      break;
728
0
    }
729
137k
    case Type::DCT32X16: {
730
137k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
137k
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
137k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
137k
      break;
734
0
    }
735
134k
    case Type::DCT16X32: {
736
134k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
134k
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
134k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
134k
      break;
740
0
    }
741
209k
    case Type::DCT32X32: {
742
209k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
209k
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
209k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
209k
      break;
746
0
    }
747
41.7k
    case Type::DCT64X32: {
748
41.7k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
41.7k
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
41.7k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
41.7k
      break;
752
0
    }
753
17.0k
    case Type::DCT32X64: {
754
17.0k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
17.0k
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
17.0k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
17.0k
      break;
758
0
    }
759
120k
    case Type::DCT64X64: {
760
120k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
120k
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
120k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
120k
      break;
764
0
    }
765
3
    case Type::DCT128X64: {
766
3
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
3
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
3
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
3
      break;
770
0
    }
771
9
    case Type::DCT64X128: {
772
9
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
9
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
9
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
9
      break;
776
0
    }
777
6
    case Type::DCT128X128: {
778
6
      ReinterpretingDCT<
779
6
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
6
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
6
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
6
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
0
    case Type::DCT256X256: {
799
0
      ReinterpretingDCT<
800
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
0
      break;
804
0
    }
805
4.26M
    case Type::DCT:
806
10.0M
    case Type::DCT2X2:
807
10.0M
    case Type::DCT4X4:
808
10.2M
    case Type::DCT4X8:
809
10.5M
    case Type::DCT8X4:
810
10.8M
    case Type::AFV0:
811
11.0M
    case Type::AFV1:
812
11.2M
    case Type::AFV2:
813
11.4M
    case Type::AFV3:
814
14.3M
    case Type::IDENTITY:
815
14.3M
      llf[0] = dc[0];
816
14.3M
      break;
817
16.7M
  };
818
16.7M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
694
16.7M
                                              float* JXL_RESTRICT scratch) {
695
16.7M
  using Type = AcStrategyType;
696
16.7M
  HWY_ALIGN float warm_block[4 * 4];
697
16.7M
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
16.7M
  switch (strategy) {
699
613k
    case Type::DCT16X8: {
700
613k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
613k
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
613k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
613k
      break;
704
0
    }
705
665k
    case Type::DCT8X16: {
706
665k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
665k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
665k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
665k
      break;
710
0
    }
711
424k
    case Type::DCT16X16: {
712
424k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
424k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
424k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
424k
      break;
716
0
    }
717
714
    case Type::DCT32X8: {
718
714
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
714
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
714
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
714
      break;
722
0
    }
723
108
    case Type::DCT8X32: {
724
108
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
108
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
108
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
108
      break;
728
0
    }
729
137k
    case Type::DCT32X16: {
730
137k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
137k
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
137k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
137k
      break;
734
0
    }
735
134k
    case Type::DCT16X32: {
736
134k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
134k
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
134k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
134k
      break;
740
0
    }
741
209k
    case Type::DCT32X32: {
742
209k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
209k
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
209k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
209k
      break;
746
0
    }
747
41.7k
    case Type::DCT64X32: {
748
41.7k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
41.7k
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
41.7k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
41.7k
      break;
752
0
    }
753
17.0k
    case Type::DCT32X64: {
754
17.0k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
17.0k
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
17.0k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
17.0k
      break;
758
0
    }
759
120k
    case Type::DCT64X64: {
760
120k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
120k
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
120k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
120k
      break;
764
0
    }
765
3
    case Type::DCT128X64: {
766
3
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
3
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
3
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
3
      break;
770
0
    }
771
9
    case Type::DCT64X128: {
772
9
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
9
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
9
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
9
      break;
776
0
    }
777
6
    case Type::DCT128X128: {
778
6
      ReinterpretingDCT<
779
6
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
6
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
6
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
6
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
0
    case Type::DCT256X256: {
799
0
      ReinterpretingDCT<
800
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
0
      break;
804
0
    }
805
4.26M
    case Type::DCT:
806
10.0M
    case Type::DCT2X2:
807
10.0M
    case Type::DCT4X4:
808
10.2M
    case Type::DCT4X8:
809
10.5M
    case Type::DCT8X4:
810
10.8M
    case Type::AFV0:
811
11.0M
    case Type::AFV1:
812
11.2M
    case Type::AFV2:
813
11.4M
    case Type::AFV3:
814
14.3M
    case Type::IDENTITY:
815
14.3M
      llf[0] = dc[0];
816
14.3M
      break;
817
16.7M
  };
818
16.7M
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
819
820
}  // namespace
821
// NOLINTNEXTLINE(google-readability-namespace-comments)
822
}  // namespace HWY_NAMESPACE
823
}  // namespace jxl
824
HWY_AFTER_NAMESPACE();
825
826
#endif  // LIB_JXL_DEC_TRANSFORMS_INL_H_