Coverage Report

Created: 2025-11-14 07:32

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/dec_transforms-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include <cstring>
7
8
#include "lib/jxl/base/compiler_specific.h"
9
#include "lib/jxl/frame_dimensions.h"
10
11
#if defined(LIB_JXL_DEC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
12
#ifdef LIB_JXL_DEC_TRANSFORMS_INL_H_
13
#undef LIB_JXL_DEC_TRANSFORMS_INL_H_
14
#else
15
#define LIB_JXL_DEC_TRANSFORMS_INL_H_
16
#endif
17
18
#include <cstddef>
19
#include <hwy/highway.h>
20
21
#include "lib/jxl/ac_strategy.h"
22
#include "lib/jxl/dct-inl.h"
23
#include "lib/jxl/dct_scales.h"
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
namespace HWY_NAMESPACE {
27
namespace {
28
29
// These templates are not found via ADL.
30
using hwy::HWY_NAMESPACE::MulAdd;
31
32
// Computes the lowest-frequency LF_ROWSxLF_COLS-sized square in output, which
33
// is a DCT_ROWS*DCT_COLS-sized DCT block, by doing a ROWS*COLS DCT on the
34
// input block.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
JXL_INLINE void ReinterpretingDCT(const float* input, const size_t input_stride,
38
                                  float* output, const size_t output_stride,
39
                                  float* JXL_RESTRICT block,
40
3.03M
                                  float* JXL_RESTRICT scratch_space) {
41
3.03M
  static_assert(LF_ROWS == ROWS,
42
3.03M
                "ReinterpretingDCT should only be called with LF == N");
43
3.03M
  static_assert(LF_COLS == COLS,
44
3.03M
                "ReinterpretingDCT should only be called with LF == N");
45
3.03M
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
3.03M
                                 scratch_space);
47
3.03M
  if (ROWS < COLS) {
48
2.29M
    for (size_t y = 0; y < LF_ROWS; y++) {
49
4.94M
      for (size_t x = 0; x < LF_COLS; x++) {
50
3.68M
        output[y * output_stride + x] =
51
3.68M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
3.68M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
3.68M
      }
54
1.26M
    }
55
2.00M
  } else {
56
6.92M
    for (size_t y = 0; y < LF_COLS; y++) {
57
26.8M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
21.9M
        output[y * output_stride + x] =
59
21.9M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
21.9M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
21.9M
      }
62
4.91M
    }
63
2.00M
  }
64
3.03M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
775k
                                  float* JXL_RESTRICT scratch_space) {
41
775k
  static_assert(LF_ROWS == ROWS,
42
775k
                "ReinterpretingDCT should only be called with LF == N");
43
775k
  static_assert(LF_COLS == COLS,
44
775k
                "ReinterpretingDCT should only be called with LF == N");
45
775k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
775k
                                 scratch_space);
47
775k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
775k
  } else {
56
1.55M
    for (size_t y = 0; y < LF_COLS; y++) {
57
2.32M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.55M
        output[y * output_stride + x] =
59
1.55M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.55M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.55M
      }
62
775k
    }
63
775k
  }
64
775k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
838k
                                  float* JXL_RESTRICT scratch_space) {
41
838k
  static_assert(LF_ROWS == ROWS,
42
838k
                "ReinterpretingDCT should only be called with LF == N");
43
838k
  static_assert(LF_COLS == COLS,
44
838k
                "ReinterpretingDCT should only be called with LF == N");
45
838k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
838k
                                 scratch_space);
47
838k
  if (ROWS < COLS) {
48
1.67M
    for (size_t y = 0; y < LF_ROWS; y++) {
49
2.51M
      for (size_t x = 0; x < LF_COLS; x++) {
50
1.67M
        output[y * output_stride + x] =
51
1.67M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
1.67M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
1.67M
      }
54
838k
    }
55
838k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
838k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
547k
                                  float* JXL_RESTRICT scratch_space) {
41
547k
  static_assert(LF_ROWS == ROWS,
42
547k
                "ReinterpretingDCT should only be called with LF == N");
43
547k
  static_assert(LF_COLS == COLS,
44
547k
                "ReinterpretingDCT should only be called with LF == N");
45
547k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
547k
                                 scratch_space);
47
547k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
547k
  } else {
56
1.64M
    for (size_t y = 0; y < LF_COLS; y++) {
57
3.28M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
2.18M
        output[y * output_stride + x] =
59
2.18M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
2.18M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
2.18M
      }
62
1.09M
    }
63
547k
  }
64
547k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
708
                                  float* JXL_RESTRICT scratch_space) {
41
708
  static_assert(LF_ROWS == ROWS,
42
708
                "ReinterpretingDCT should only be called with LF == N");
43
708
  static_assert(LF_COLS == COLS,
44
708
                "ReinterpretingDCT should only be called with LF == N");
45
708
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
708
                                 scratch_space);
47
708
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
708
  } else {
56
1.41k
    for (size_t y = 0; y < LF_COLS; y++) {
57
3.54k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
2.83k
        output[y * output_stride + x] =
59
2.83k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
2.83k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
2.83k
      }
62
708
    }
63
708
  }
64
708
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
108
                                  float* JXL_RESTRICT scratch_space) {
41
108
  static_assert(LF_ROWS == ROWS,
42
108
                "ReinterpretingDCT should only be called with LF == N");
43
108
  static_assert(LF_COLS == COLS,
44
108
                "ReinterpretingDCT should only be called with LF == N");
45
108
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
108
                                 scratch_space);
47
108
  if (ROWS < COLS) {
48
216
    for (size_t y = 0; y < LF_ROWS; y++) {
49
540
      for (size_t x = 0; x < LF_COLS; x++) {
50
432
        output[y * output_stride + x] =
51
432
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
432
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
432
      }
54
108
    }
55
108
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
108
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
173k
                                  float* JXL_RESTRICT scratch_space) {
41
173k
  static_assert(LF_ROWS == ROWS,
42
173k
                "ReinterpretingDCT should only be called with LF == N");
43
173k
  static_assert(LF_COLS == COLS,
44
173k
                "ReinterpretingDCT should only be called with LF == N");
45
173k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
173k
                                 scratch_space);
47
173k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
173k
  } else {
56
520k
    for (size_t y = 0; y < LF_COLS; y++) {
57
1.73M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.38M
        output[y * output_stride + x] =
59
1.38M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.38M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.38M
      }
62
346k
    }
63
173k
  }
64
173k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
173k
                                  float* JXL_RESTRICT scratch_space) {
41
173k
  static_assert(LF_ROWS == ROWS,
42
173k
                "ReinterpretingDCT should only be called with LF == N");
43
173k
  static_assert(LF_COLS == COLS,
44
173k
                "ReinterpretingDCT should only be called with LF == N");
45
173k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
173k
                                 scratch_space);
47
173k
  if (ROWS < COLS) {
48
519k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
1.73M
      for (size_t x = 0; x < LF_COLS; x++) {
50
1.38M
        output[y * output_stride + x] =
51
1.38M
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
1.38M
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
1.38M
      }
54
346k
    }
55
173k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
173k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
298k
                                  float* JXL_RESTRICT scratch_space) {
41
298k
  static_assert(LF_ROWS == ROWS,
42
298k
                "ReinterpretingDCT should only be called with LF == N");
43
298k
  static_assert(LF_COLS == COLS,
44
298k
                "ReinterpretingDCT should only be called with LF == N");
45
298k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
298k
                                 scratch_space);
47
298k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
298k
  } else {
56
1.49M
    for (size_t y = 0; y < LF_COLS; y++) {
57
5.97M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
4.78M
        output[y * output_stride + x] =
59
4.78M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
4.78M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
4.78M
      }
62
1.19M
    }
63
298k
  }
64
298k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
48.6k
                                  float* JXL_RESTRICT scratch_space) {
41
48.6k
  static_assert(LF_ROWS == ROWS,
42
48.6k
                "ReinterpretingDCT should only be called with LF == N");
43
48.6k
  static_assert(LF_COLS == COLS,
44
48.6k
                "ReinterpretingDCT should only be called with LF == N");
45
48.6k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
48.6k
                                 scratch_space);
47
48.6k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
48.6k
  } else {
56
243k
    for (size_t y = 0; y < LF_COLS; y++) {
57
1.75M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.55M
        output[y * output_stride + x] =
59
1.55M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.55M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.55M
      }
62
194k
    }
63
48.6k
  }
64
48.6k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
19.2k
                                  float* JXL_RESTRICT scratch_space) {
41
19.2k
  static_assert(LF_ROWS == ROWS,
42
19.2k
                "ReinterpretingDCT should only be called with LF == N");
43
19.2k
  static_assert(LF_COLS == COLS,
44
19.2k
                "ReinterpretingDCT should only be called with LF == N");
45
19.2k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
19.2k
                                 scratch_space);
47
19.2k
  if (ROWS < COLS) {
48
96.2k
    for (size_t y = 0; y < LF_ROWS; y++) {
49
693k
      for (size_t x = 0; x < LF_COLS; x++) {
50
616k
        output[y * output_stride + x] =
51
616k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
616k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
616k
      }
54
77.0k
    }
55
19.2k
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
19.2k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
163k
                                  float* JXL_RESTRICT scratch_space) {
41
163k
  static_assert(LF_ROWS == ROWS,
42
163k
                "ReinterpretingDCT should only be called with LF == N");
43
163k
  static_assert(LF_COLS == COLS,
44
163k
                "ReinterpretingDCT should only be called with LF == N");
45
163k
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
163k
                                 scratch_space);
47
163k
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
163k
  } else {
56
1.47M
    for (size_t y = 0; y < LF_COLS; y++) {
57
11.7M
      for (size_t x = 0; x < LF_ROWS; x++) {
58
10.4M
        output[y * output_stride + x] =
59
10.4M
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
10.4M
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
10.4M
      }
62
1.31M
    }
63
163k
  }
64
163k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
3
                                  float* JXL_RESTRICT scratch_space) {
41
3
  static_assert(LF_ROWS == ROWS,
42
3
                "ReinterpretingDCT should only be called with LF == N");
43
3
  static_assert(LF_COLS == COLS,
44
3
                "ReinterpretingDCT should only be called with LF == N");
45
3
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
3
                                 scratch_space);
47
3
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
3
  } else {
56
27
    for (size_t y = 0; y < LF_COLS; y++) {
57
408
      for (size_t x = 0; x < LF_ROWS; x++) {
58
384
        output[y * output_stride + x] =
59
384
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
384
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
384
      }
62
24
    }
63
3
  }
64
3
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
9
                                  float* JXL_RESTRICT scratch_space) {
41
9
  static_assert(LF_ROWS == ROWS,
42
9
                "ReinterpretingDCT should only be called with LF == N");
43
9
  static_assert(LF_COLS == COLS,
44
9
                "ReinterpretingDCT should only be called with LF == N");
45
9
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
9
                                 scratch_space);
47
9
  if (ROWS < COLS) {
48
81
    for (size_t y = 0; y < LF_ROWS; y++) {
49
1.22k
      for (size_t x = 0; x < LF_COLS; x++) {
50
1.15k
        output[y * output_stride + x] =
51
1.15k
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
1.15k
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
1.15k
      }
54
72
    }
55
9
  } else {
56
0
    for (size_t y = 0; y < LF_COLS; y++) {
57
0
      for (size_t x = 0; x < LF_ROWS; x++) {
58
0
        output[y * output_stride + x] =
59
0
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
0
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
0
      }
62
0
    }
63
0
  }
64
9
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Line
Count
Source
40
6
                                  float* JXL_RESTRICT scratch_space) {
41
6
  static_assert(LF_ROWS == ROWS,
42
6
                "ReinterpretingDCT should only be called with LF == N");
43
6
  static_assert(LF_COLS == COLS,
44
6
                "ReinterpretingDCT should only be called with LF == N");
45
6
  ComputeScaledDCT<ROWS, COLS>()(DCTFrom(input, input_stride), block,
46
6
                                 scratch_space);
47
6
  if (ROWS < COLS) {
48
0
    for (size_t y = 0; y < LF_ROWS; y++) {
49
0
      for (size_t x = 0; x < LF_COLS; x++) {
50
0
        output[y * output_stride + x] =
51
0
            block[y * COLS + x] * DCTTotalResampleScale<ROWS, DCT_ROWS>(y) *
52
0
            DCTTotalResampleScale<COLS, DCT_COLS>(x);
53
0
      }
54
0
    }
55
6
  } else {
56
102
    for (size_t y = 0; y < LF_COLS; y++) {
57
1.63k
      for (size_t x = 0; x < LF_ROWS; x++) {
58
1.53k
        output[y * output_stride + x] =
59
1.53k
            block[y * ROWS + x] * DCTTotalResampleScale<COLS, DCT_COLS>(y) *
60
1.53k
            DCTTotalResampleScale<ROWS, DCT_ROWS>(x);
61
1.53k
      }
62
96
    }
63
6
  }
64
6
}
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*, float*)
65
66
template <size_t S>
67
62.1M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
62.1M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
62.1M
  static_assert(S % 2 == 0, "S should be even");
70
62.1M
  float temp[kDCTBlockSize];
71
62.1M
  constexpr size_t num_2x2 = S / 2;
72
207M
  for (size_t y = 0; y < num_2x2; y++) {
73
579M
    for (size_t x = 0; x < num_2x2; x++) {
74
434M
      float c00 = block[y * kBlockDim + x];
75
434M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
434M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
434M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
434M
      float r00 = c00 + c01 + c10 + c11;
79
434M
      float r01 = c00 + c01 - c10 - c11;
80
434M
      float r10 = c00 - c01 + c10 - c11;
81
434M
      float r11 = c00 - c01 - c10 + c11;
82
434M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
434M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
434M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
434M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
434M
    }
87
144M
  }
88
351M
  for (size_t y = 0; y < S; y++) {
89
2.02G
    for (size_t x = 0; x < S; x++) {
90
1.73G
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
1.73G
    }
92
289M
  }
93
62.1M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
13.9M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
13.9M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
13.9M
  static_assert(S % 2 == 0, "S should be even");
70
13.9M
  float temp[kDCTBlockSize];
71
13.9M
  constexpr size_t num_2x2 = S / 2;
72
27.8M
  for (size_t y = 0; y < num_2x2; y++) {
73
27.8M
    for (size_t x = 0; x < num_2x2; x++) {
74
13.9M
      float c00 = block[y * kBlockDim + x];
75
13.9M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
13.9M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
13.9M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
13.9M
      float r00 = c00 + c01 + c10 + c11;
79
13.9M
      float r01 = c00 + c01 - c10 - c11;
80
13.9M
      float r10 = c00 - c01 + c10 - c11;
81
13.9M
      float r11 = c00 - c01 - c10 + c11;
82
13.9M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
13.9M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
13.9M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
13.9M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
13.9M
    }
87
13.9M
  }
88
41.8M
  for (size_t y = 0; y < S; y++) {
89
83.6M
    for (size_t x = 0; x < S; x++) {
90
55.7M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
55.7M
    }
92
27.8M
  }
93
13.9M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
13.9M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
13.9M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
13.9M
  static_assert(S % 2 == 0, "S should be even");
70
13.9M
  float temp[kDCTBlockSize];
71
13.9M
  constexpr size_t num_2x2 = S / 2;
72
41.8M
  for (size_t y = 0; y < num_2x2; y++) {
73
83.6M
    for (size_t x = 0; x < num_2x2; x++) {
74
55.7M
      float c00 = block[y * kBlockDim + x];
75
55.7M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
55.7M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
55.7M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
55.7M
      float r00 = c00 + c01 + c10 + c11;
79
55.7M
      float r01 = c00 + c01 - c10 - c11;
80
55.7M
      float r10 = c00 - c01 + c10 - c11;
81
55.7M
      float r11 = c00 - c01 - c10 + c11;
82
55.7M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
55.7M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
55.7M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
55.7M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
55.7M
    }
87
27.8M
  }
88
69.7M
  for (size_t y = 0; y < S; y++) {
89
278M
    for (size_t x = 0; x < S; x++) {
90
223M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
223M
    }
92
55.7M
  }
93
13.9M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
13.9M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
13.9M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
13.9M
  static_assert(S % 2 == 0, "S should be even");
70
13.9M
  float temp[kDCTBlockSize];
71
13.9M
  constexpr size_t num_2x2 = S / 2;
72
69.7M
  for (size_t y = 0; y < num_2x2; y++) {
73
278M
    for (size_t x = 0; x < num_2x2; x++) {
74
223M
      float c00 = block[y * kBlockDim + x];
75
223M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
223M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
223M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
223M
      float r00 = c00 + c01 + c10 + c11;
79
223M
      float r01 = c00 + c01 - c10 - c11;
80
223M
      float r10 = c00 - c01 + c10 - c11;
81
223M
      float r11 = c00 - c01 - c10 + c11;
82
223M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
223M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
223M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
223M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
223M
    }
87
55.7M
  }
88
125M
  for (size_t y = 0; y < S; y++) {
89
1.00G
    for (size_t x = 0; x < S; x++) {
90
892M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
892M
    }
92
111M
  }
93
13.9M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
6.75M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
6.75M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
6.75M
  static_assert(S % 2 == 0, "S should be even");
70
6.75M
  float temp[kDCTBlockSize];
71
6.75M
  constexpr size_t num_2x2 = S / 2;
72
13.5M
  for (size_t y = 0; y < num_2x2; y++) {
73
13.5M
    for (size_t x = 0; x < num_2x2; x++) {
74
6.75M
      float c00 = block[y * kBlockDim + x];
75
6.75M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
6.75M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
6.75M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
6.75M
      float r00 = c00 + c01 + c10 + c11;
79
6.75M
      float r01 = c00 + c01 - c10 - c11;
80
6.75M
      float r10 = c00 - c01 + c10 - c11;
81
6.75M
      float r11 = c00 - c01 - c10 + c11;
82
6.75M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
6.75M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
6.75M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
6.75M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
6.75M
    }
87
6.75M
  }
88
20.2M
  for (size_t y = 0; y < S; y++) {
89
40.5M
    for (size_t x = 0; x < S; x++) {
90
27.0M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
27.0M
    }
92
13.5M
  }
93
6.75M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
6.75M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
6.75M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
6.75M
  static_assert(S % 2 == 0, "S should be even");
70
6.75M
  float temp[kDCTBlockSize];
71
6.75M
  constexpr size_t num_2x2 = S / 2;
72
20.2M
  for (size_t y = 0; y < num_2x2; y++) {
73
40.5M
    for (size_t x = 0; x < num_2x2; x++) {
74
27.0M
      float c00 = block[y * kBlockDim + x];
75
27.0M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
27.0M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
27.0M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
27.0M
      float r00 = c00 + c01 + c10 + c11;
79
27.0M
      float r01 = c00 + c01 - c10 - c11;
80
27.0M
      float r10 = c00 - c01 + c10 - c11;
81
27.0M
      float r11 = c00 - c01 - c10 + c11;
82
27.0M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
27.0M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
27.0M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
27.0M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
27.0M
    }
87
13.5M
  }
88
33.7M
  for (size_t y = 0; y < S; y++) {
89
135M
    for (size_t x = 0; x < S; x++) {
90
108M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
108M
    }
92
27.0M
  }
93
6.75M
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
6.75M
void IDCT2TopBlock(const float* block, size_t stride_out, float* out) {
68
6.75M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
6.75M
  static_assert(S % 2 == 0, "S should be even");
70
6.75M
  float temp[kDCTBlockSize];
71
6.75M
  constexpr size_t num_2x2 = S / 2;
72
33.7M
  for (size_t y = 0; y < num_2x2; y++) {
73
135M
    for (size_t x = 0; x < num_2x2; x++) {
74
108M
      float c00 = block[y * kBlockDim + x];
75
108M
      float c01 = block[y * kBlockDim + num_2x2 + x];
76
108M
      float c10 = block[(y + num_2x2) * kBlockDim + x];
77
108M
      float c11 = block[(y + num_2x2) * kBlockDim + num_2x2 + x];
78
108M
      float r00 = c00 + c01 + c10 + c11;
79
108M
      float r01 = c00 + c01 - c10 - c11;
80
108M
      float r10 = c00 - c01 + c10 - c11;
81
108M
      float r11 = c00 - c01 - c10 + c11;
82
108M
      temp[y * 2 * kBlockDim + x * 2] = r00;
83
108M
      temp[y * 2 * kBlockDim + x * 2 + 1] = r01;
84
108M
      temp[(y * 2 + 1) * kBlockDim + x * 2] = r10;
85
108M
      temp[(y * 2 + 1) * kBlockDim + x * 2 + 1] = r11;
86
108M
    }
87
27.0M
  }
88
60.8M
  for (size_t y = 0; y < S; y++) {
89
486M
    for (size_t x = 0; x < S; x++) {
90
432M
      out[y * stride_out + x] = temp[y * kBlockDim + x];
91
432M
    }
92
54.0M
  }
93
6.75M
}
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::IDCT2TopBlock<8ul>(float const*, unsigned long, float*)
94
95
56.8M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
56.8M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
56.8M
      {
98
56.8M
          0.25,
99
56.8M
          0.25,
100
56.8M
          0.25,
101
56.8M
          0.25,
102
56.8M
          0.25,
103
56.8M
          0.25,
104
56.8M
          0.25,
105
56.8M
          0.25,
106
56.8M
          0.25,
107
56.8M
          0.25,
108
56.8M
          0.25,
109
56.8M
          0.25,
110
56.8M
          0.25,
111
56.8M
          0.25,
112
56.8M
          0.25,
113
56.8M
          0.25,
114
56.8M
      },
115
56.8M
      {
116
56.8M
          0.876902929799142f,
117
56.8M
          0.2206518106944235f,
118
56.8M
          -0.10140050393753763f,
119
56.8M
          -0.1014005039375375f,
120
56.8M
          0.2206518106944236f,
121
56.8M
          -0.10140050393753777f,
122
56.8M
          -0.10140050393753772f,
123
56.8M
          -0.10140050393753763f,
124
56.8M
          -0.10140050393753758f,
125
56.8M
          -0.10140050393753769f,
126
56.8M
          -0.1014005039375375f,
127
56.8M
          -0.10140050393753768f,
128
56.8M
          -0.10140050393753768f,
129
56.8M
          -0.10140050393753759f,
130
56.8M
          -0.10140050393753763f,
131
56.8M
          -0.10140050393753741f,
132
56.8M
      },
133
56.8M
      {
134
56.8M
          0.0,
135
56.8M
          0.0,
136
56.8M
          0.40670075830260755f,
137
56.8M
          0.44444816619734445f,
138
56.8M
          0.0,
139
56.8M
          0.0,
140
56.8M
          0.19574399372042936f,
141
56.8M
          0.2929100136981264f,
142
56.8M
          -0.40670075830260716f,
143
56.8M
          -0.19574399372042872f,
144
56.8M
          0.0,
145
56.8M
          0.11379074460448091f,
146
56.8M
          -0.44444816619734384f,
147
56.8M
          -0.29291001369812636f,
148
56.8M
          -0.1137907446044814f,
149
56.8M
          0.0,
150
56.8M
      },
151
56.8M
      {
152
56.8M
          0.0,
153
56.8M
          0.0,
154
56.8M
          -0.21255748058288748f,
155
56.8M
          0.3085497062849767f,
156
56.8M
          0.0,
157
56.8M
          0.4706702258572536f,
158
56.8M
          -0.1621205195722993f,
159
56.8M
          0.0,
160
56.8M
          -0.21255748058287047f,
161
56.8M
          -0.16212051957228327f,
162
56.8M
          -0.47067022585725277f,
163
56.8M
          -0.1464291867126764f,
164
56.8M
          0.3085497062849487f,
165
56.8M
          0.0,
166
56.8M
          -0.14642918671266536f,
167
56.8M
          0.4251149611657548f,
168
56.8M
      },
169
56.8M
      {
170
56.8M
          0.0,
171
56.8M
          -0.7071067811865474f,
172
56.8M
          0.0,
173
56.8M
          0.0,
174
56.8M
          0.7071067811865476f,
175
56.8M
          0.0,
176
56.8M
          0.0,
177
56.8M
          0.0,
178
56.8M
          0.0,
179
56.8M
          0.0,
180
56.8M
          0.0,
181
56.8M
          0.0,
182
56.8M
          0.0,
183
56.8M
          0.0,
184
56.8M
          0.0,
185
56.8M
          0.0,
186
56.8M
      },
187
56.8M
      {
188
56.8M
          -0.4105377591765233f,
189
56.8M
          0.6235485373547691f,
190
56.8M
          -0.06435071657946274f,
191
56.8M
          -0.06435071657946266f,
192
56.8M
          0.6235485373547694f,
193
56.8M
          -0.06435071657946284f,
194
56.8M
          -0.0643507165794628f,
195
56.8M
          -0.06435071657946274f,
196
56.8M
          -0.06435071657946272f,
197
56.8M
          -0.06435071657946279f,
198
56.8M
          -0.06435071657946266f,
199
56.8M
          -0.06435071657946277f,
200
56.8M
          -0.06435071657946277f,
201
56.8M
          -0.06435071657946273f,
202
56.8M
          -0.06435071657946274f,
203
56.8M
          -0.0643507165794626f,
204
56.8M
      },
205
56.8M
      {
206
56.8M
          0.0,
207
56.8M
          0.0,
208
56.8M
          -0.4517556589999482f,
209
56.8M
          0.15854503551840063f,
210
56.8M
          0.0,
211
56.8M
          -0.04038515160822202f,
212
56.8M
          0.0074182263792423875f,
213
56.8M
          0.39351034269210167f,
214
56.8M
          -0.45175565899994635f,
215
56.8M
          0.007418226379244351f,
216
56.8M
          0.1107416575309343f,
217
56.8M
          0.08298163094882051f,
218
56.8M
          0.15854503551839705f,
219
56.8M
          0.3935103426921022f,
220
56.8M
          0.0829816309488214f,
221
56.8M
          -0.45175565899994796f,
222
56.8M
      },
223
56.8M
      {
224
56.8M
          0.0,
225
56.8M
          0.0,
226
56.8M
          -0.304684750724869f,
227
56.8M
          0.5112616136591823f,
228
56.8M
          0.0,
229
56.8M
          0.0,
230
56.8M
          -0.290480129728998f,
231
56.8M
          -0.06578701549142804f,
232
56.8M
          0.304684750724884f,
233
56.8M
          0.2904801297290076f,
234
56.8M
          0.0,
235
56.8M
          -0.23889773523344604f,
236
56.8M
          -0.5112616136592012f,
237
56.8M
          0.06578701549142545f,
238
56.8M
          0.23889773523345467f,
239
56.8M
          0.0,
240
56.8M
      },
241
56.8M
      {
242
56.8M
          0.0,
243
56.8M
          0.0,
244
56.8M
          0.3017929516615495f,
245
56.8M
          0.25792362796341184f,
246
56.8M
          0.0,
247
56.8M
          0.16272340142866204f,
248
56.8M
          0.09520022653475037f,
249
56.8M
          0.0,
250
56.8M
          0.3017929516615503f,
251
56.8M
          0.09520022653475055f,
252
56.8M
          -0.16272340142866173f,
253
56.8M
          -0.35312385449816297f,
254
56.8M
          0.25792362796341295f,
255
56.8M
          0.0,
256
56.8M
          -0.3531238544981624f,
257
56.8M
          -0.6035859033230976f,
258
56.8M
      },
259
56.8M
      {
260
56.8M
          0.0,
261
56.8M
          0.0,
262
56.8M
          0.40824829046386274f,
263
56.8M
          0.0,
264
56.8M
          0.0,
265
56.8M
          0.0,
266
56.8M
          0.0,
267
56.8M
          -0.4082482904638628f,
268
56.8M
          -0.4082482904638635f,
269
56.8M
          0.0,
270
56.8M
          0.0,
271
56.8M
          -0.40824829046386296f,
272
56.8M
          0.0,
273
56.8M
          0.4082482904638634f,
274
56.8M
          0.408248290463863f,
275
56.8M
          0.0,
276
56.8M
      },
277
56.8M
      {
278
56.8M
          0.0,
279
56.8M
          0.0,
280
56.8M
          0.1747866975480809f,
281
56.8M
          0.0812611176717539f,
282
56.8M
          0.0,
283
56.8M
          0.0,
284
56.8M
          -0.3675398009862027f,
285
56.8M
          -0.307882213957909f,
286
56.8M
          -0.17478669754808135f,
287
56.8M
          0.3675398009862011f,
288
56.8M
          0.0,
289
56.8M
          0.4826689115059883f,
290
56.8M
          -0.08126111767175039f,
291
56.8M
          0.30788221395790305f,
292
56.8M
          -0.48266891150598584f,
293
56.8M
          0.0,
294
56.8M
      },
295
56.8M
      {
296
56.8M
          0.0,
297
56.8M
          0.0,
298
56.8M
          -0.21105601049335784f,
299
56.8M
          0.18567180916109802f,
300
56.8M
          0.0,
301
56.8M
          0.0,
302
56.8M
          0.49215859013738733f,
303
56.8M
          -0.38525013709251915f,
304
56.8M
          0.21105601049335806f,
305
56.8M
          -0.49215859013738905f,
306
56.8M
          0.0,
307
56.8M
          0.17419412659916217f,
308
56.8M
          -0.18567180916109904f,
309
56.8M
          0.3852501370925211f,
310
56.8M
          -0.1741941265991621f,
311
56.8M
          0.0,
312
56.8M
      },
313
56.8M
      {
314
56.8M
          0.0,
315
56.8M
          0.0,
316
56.8M
          -0.14266084808807264f,
317
56.8M
          -0.3416446842253372f,
318
56.8M
          0.0,
319
56.8M
          0.7367497537172237f,
320
56.8M
          0.24627107722075148f,
321
56.8M
          -0.08574019035519306f,
322
56.8M
          -0.14266084808807344f,
323
56.8M
          0.24627107722075137f,
324
56.8M
          0.14883399227113567f,
325
56.8M
          -0.04768680350229251f,
326
56.8M
          -0.3416446842253373f,
327
56.8M
          -0.08574019035519267f,
328
56.8M
          -0.047686803502292804f,
329
56.8M
          -0.14266084808807242f,
330
56.8M
      },
331
56.8M
      {
332
56.8M
          0.0,
333
56.8M
          0.0,
334
56.8M
          -0.13813540350758585f,
335
56.8M
          0.3302282550303788f,
336
56.8M
          0.0,
337
56.8M
          0.08755115000587084f,
338
56.8M
          -0.07946706605909573f,
339
56.8M
          -0.4613374887461511f,
340
56.8M
          -0.13813540350758294f,
341
56.8M
          -0.07946706605910261f,
342
56.8M
          0.49724647109535086f,
343
56.8M
          0.12538059448563663f,
344
56.8M
          0.3302282550303805f,
345
56.8M
          -0.4613374887461554f,
346
56.8M
          0.12538059448564315f,
347
56.8M
          -0.13813540350758452f,
348
56.8M
      },
349
56.8M
      {
350
56.8M
          0.0,
351
56.8M
          0.0,
352
56.8M
          -0.17437602599651067f,
353
56.8M
          0.0702790691196284f,
354
56.8M
          0.0,
355
56.8M
          -0.2921026642334881f,
356
56.8M
          0.3623817333531167f,
357
56.8M
          0.0,
358
56.8M
          -0.1743760259965108f,
359
56.8M
          0.36238173335311646f,
360
56.8M
          0.29210266423348785f,
361
56.8M
          -0.4326608024727445f,
362
56.8M
          0.07027906911962818f,
363
56.8M
          0.0,
364
56.8M
          -0.4326608024727457f,
365
56.8M
          0.34875205199302267f,
366
56.8M
      },
367
56.8M
      {
368
56.8M
          0.0,
369
56.8M
          0.0,
370
56.8M
          0.11354987314994337f,
371
56.8M
          -0.07417504595810355f,
372
56.8M
          0.0,
373
56.8M
          0.19402893032594343f,
374
56.8M
          -0.435190496523228f,
375
56.8M
          0.21918684838857466f,
376
56.8M
          0.11354987314994257f,
377
56.8M
          -0.4351904965232251f,
378
56.8M
          0.5550443808910661f,
379
56.8M
          -0.25468277124066463f,
380
56.8M
          -0.07417504595810233f,
381
56.8M
          0.2191868483885728f,
382
56.8M
          -0.25468277124066413f,
383
56.8M
          0.1135498731499429f,
384
56.8M
      },
385
56.8M
  };
386
387
56.8M
  const HWY_CAPPED(float, 16) d;
388
170M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
113M
    auto pixel = Zero(d);
390
1.93G
    for (size_t j = 0; j < 16; j++) {
391
1.81G
      auto cf = Set(d, coeffs[j]);
392
1.81G
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
1.81G
      pixel = MulAdd(cf, basis, pixel);
394
1.81G
    }
395
113M
    Store(pixel, d, pixels + i);
396
113M
  }
397
56.8M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
55.7M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
55.7M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
55.7M
      {
98
55.7M
          0.25,
99
55.7M
          0.25,
100
55.7M
          0.25,
101
55.7M
          0.25,
102
55.7M
          0.25,
103
55.7M
          0.25,
104
55.7M
          0.25,
105
55.7M
          0.25,
106
55.7M
          0.25,
107
55.7M
          0.25,
108
55.7M
          0.25,
109
55.7M
          0.25,
110
55.7M
          0.25,
111
55.7M
          0.25,
112
55.7M
          0.25,
113
55.7M
          0.25,
114
55.7M
      },
115
55.7M
      {
116
55.7M
          0.876902929799142f,
117
55.7M
          0.2206518106944235f,
118
55.7M
          -0.10140050393753763f,
119
55.7M
          -0.1014005039375375f,
120
55.7M
          0.2206518106944236f,
121
55.7M
          -0.10140050393753777f,
122
55.7M
          -0.10140050393753772f,
123
55.7M
          -0.10140050393753763f,
124
55.7M
          -0.10140050393753758f,
125
55.7M
          -0.10140050393753769f,
126
55.7M
          -0.1014005039375375f,
127
55.7M
          -0.10140050393753768f,
128
55.7M
          -0.10140050393753768f,
129
55.7M
          -0.10140050393753759f,
130
55.7M
          -0.10140050393753763f,
131
55.7M
          -0.10140050393753741f,
132
55.7M
      },
133
55.7M
      {
134
55.7M
          0.0,
135
55.7M
          0.0,
136
55.7M
          0.40670075830260755f,
137
55.7M
          0.44444816619734445f,
138
55.7M
          0.0,
139
55.7M
          0.0,
140
55.7M
          0.19574399372042936f,
141
55.7M
          0.2929100136981264f,
142
55.7M
          -0.40670075830260716f,
143
55.7M
          -0.19574399372042872f,
144
55.7M
          0.0,
145
55.7M
          0.11379074460448091f,
146
55.7M
          -0.44444816619734384f,
147
55.7M
          -0.29291001369812636f,
148
55.7M
          -0.1137907446044814f,
149
55.7M
          0.0,
150
55.7M
      },
151
55.7M
      {
152
55.7M
          0.0,
153
55.7M
          0.0,
154
55.7M
          -0.21255748058288748f,
155
55.7M
          0.3085497062849767f,
156
55.7M
          0.0,
157
55.7M
          0.4706702258572536f,
158
55.7M
          -0.1621205195722993f,
159
55.7M
          0.0,
160
55.7M
          -0.21255748058287047f,
161
55.7M
          -0.16212051957228327f,
162
55.7M
          -0.47067022585725277f,
163
55.7M
          -0.1464291867126764f,
164
55.7M
          0.3085497062849487f,
165
55.7M
          0.0,
166
55.7M
          -0.14642918671266536f,
167
55.7M
          0.4251149611657548f,
168
55.7M
      },
169
55.7M
      {
170
55.7M
          0.0,
171
55.7M
          -0.7071067811865474f,
172
55.7M
          0.0,
173
55.7M
          0.0,
174
55.7M
          0.7071067811865476f,
175
55.7M
          0.0,
176
55.7M
          0.0,
177
55.7M
          0.0,
178
55.7M
          0.0,
179
55.7M
          0.0,
180
55.7M
          0.0,
181
55.7M
          0.0,
182
55.7M
          0.0,
183
55.7M
          0.0,
184
55.7M
          0.0,
185
55.7M
          0.0,
186
55.7M
      },
187
55.7M
      {
188
55.7M
          -0.4105377591765233f,
189
55.7M
          0.6235485373547691f,
190
55.7M
          -0.06435071657946274f,
191
55.7M
          -0.06435071657946266f,
192
55.7M
          0.6235485373547694f,
193
55.7M
          -0.06435071657946284f,
194
55.7M
          -0.0643507165794628f,
195
55.7M
          -0.06435071657946274f,
196
55.7M
          -0.06435071657946272f,
197
55.7M
          -0.06435071657946279f,
198
55.7M
          -0.06435071657946266f,
199
55.7M
          -0.06435071657946277f,
200
55.7M
          -0.06435071657946277f,
201
55.7M
          -0.06435071657946273f,
202
55.7M
          -0.06435071657946274f,
203
55.7M
          -0.0643507165794626f,
204
55.7M
      },
205
55.7M
      {
206
55.7M
          0.0,
207
55.7M
          0.0,
208
55.7M
          -0.4517556589999482f,
209
55.7M
          0.15854503551840063f,
210
55.7M
          0.0,
211
55.7M
          -0.04038515160822202f,
212
55.7M
          0.0074182263792423875f,
213
55.7M
          0.39351034269210167f,
214
55.7M
          -0.45175565899994635f,
215
55.7M
          0.007418226379244351f,
216
55.7M
          0.1107416575309343f,
217
55.7M
          0.08298163094882051f,
218
55.7M
          0.15854503551839705f,
219
55.7M
          0.3935103426921022f,
220
55.7M
          0.0829816309488214f,
221
55.7M
          -0.45175565899994796f,
222
55.7M
      },
223
55.7M
      {
224
55.7M
          0.0,
225
55.7M
          0.0,
226
55.7M
          -0.304684750724869f,
227
55.7M
          0.5112616136591823f,
228
55.7M
          0.0,
229
55.7M
          0.0,
230
55.7M
          -0.290480129728998f,
231
55.7M
          -0.06578701549142804f,
232
55.7M
          0.304684750724884f,
233
55.7M
          0.2904801297290076f,
234
55.7M
          0.0,
235
55.7M
          -0.23889773523344604f,
236
55.7M
          -0.5112616136592012f,
237
55.7M
          0.06578701549142545f,
238
55.7M
          0.23889773523345467f,
239
55.7M
          0.0,
240
55.7M
      },
241
55.7M
      {
242
55.7M
          0.0,
243
55.7M
          0.0,
244
55.7M
          0.3017929516615495f,
245
55.7M
          0.25792362796341184f,
246
55.7M
          0.0,
247
55.7M
          0.16272340142866204f,
248
55.7M
          0.09520022653475037f,
249
55.7M
          0.0,
250
55.7M
          0.3017929516615503f,
251
55.7M
          0.09520022653475055f,
252
55.7M
          -0.16272340142866173f,
253
55.7M
          -0.35312385449816297f,
254
55.7M
          0.25792362796341295f,
255
55.7M
          0.0,
256
55.7M
          -0.3531238544981624f,
257
55.7M
          -0.6035859033230976f,
258
55.7M
      },
259
55.7M
      {
260
55.7M
          0.0,
261
55.7M
          0.0,
262
55.7M
          0.40824829046386274f,
263
55.7M
          0.0,
264
55.7M
          0.0,
265
55.7M
          0.0,
266
55.7M
          0.0,
267
55.7M
          -0.4082482904638628f,
268
55.7M
          -0.4082482904638635f,
269
55.7M
          0.0,
270
55.7M
          0.0,
271
55.7M
          -0.40824829046386296f,
272
55.7M
          0.0,
273
55.7M
          0.4082482904638634f,
274
55.7M
          0.408248290463863f,
275
55.7M
          0.0,
276
55.7M
      },
277
55.7M
      {
278
55.7M
          0.0,
279
55.7M
          0.0,
280
55.7M
          0.1747866975480809f,
281
55.7M
          0.0812611176717539f,
282
55.7M
          0.0,
283
55.7M
          0.0,
284
55.7M
          -0.3675398009862027f,
285
55.7M
          -0.307882213957909f,
286
55.7M
          -0.17478669754808135f,
287
55.7M
          0.3675398009862011f,
288
55.7M
          0.0,
289
55.7M
          0.4826689115059883f,
290
55.7M
          -0.08126111767175039f,
291
55.7M
          0.30788221395790305f,
292
55.7M
          -0.48266891150598584f,
293
55.7M
          0.0,
294
55.7M
      },
295
55.7M
      {
296
55.7M
          0.0,
297
55.7M
          0.0,
298
55.7M
          -0.21105601049335784f,
299
55.7M
          0.18567180916109802f,
300
55.7M
          0.0,
301
55.7M
          0.0,
302
55.7M
          0.49215859013738733f,
303
55.7M
          -0.38525013709251915f,
304
55.7M
          0.21105601049335806f,
305
55.7M
          -0.49215859013738905f,
306
55.7M
          0.0,
307
55.7M
          0.17419412659916217f,
308
55.7M
          -0.18567180916109904f,
309
55.7M
          0.3852501370925211f,
310
55.7M
          -0.1741941265991621f,
311
55.7M
          0.0,
312
55.7M
      },
313
55.7M
      {
314
55.7M
          0.0,
315
55.7M
          0.0,
316
55.7M
          -0.14266084808807264f,
317
55.7M
          -0.3416446842253372f,
318
55.7M
          0.0,
319
55.7M
          0.7367497537172237f,
320
55.7M
          0.24627107722075148f,
321
55.7M
          -0.08574019035519306f,
322
55.7M
          -0.14266084808807344f,
323
55.7M
          0.24627107722075137f,
324
55.7M
          0.14883399227113567f,
325
55.7M
          -0.04768680350229251f,
326
55.7M
          -0.3416446842253373f,
327
55.7M
          -0.08574019035519267f,
328
55.7M
          -0.047686803502292804f,
329
55.7M
          -0.14266084808807242f,
330
55.7M
      },
331
55.7M
      {
332
55.7M
          0.0,
333
55.7M
          0.0,
334
55.7M
          -0.13813540350758585f,
335
55.7M
          0.3302282550303788f,
336
55.7M
          0.0,
337
55.7M
          0.08755115000587084f,
338
55.7M
          -0.07946706605909573f,
339
55.7M
          -0.4613374887461511f,
340
55.7M
          -0.13813540350758294f,
341
55.7M
          -0.07946706605910261f,
342
55.7M
          0.49724647109535086f,
343
55.7M
          0.12538059448563663f,
344
55.7M
          0.3302282550303805f,
345
55.7M
          -0.4613374887461554f,
346
55.7M
          0.12538059448564315f,
347
55.7M
          -0.13813540350758452f,
348
55.7M
      },
349
55.7M
      {
350
55.7M
          0.0,
351
55.7M
          0.0,
352
55.7M
          -0.17437602599651067f,
353
55.7M
          0.0702790691196284f,
354
55.7M
          0.0,
355
55.7M
          -0.2921026642334881f,
356
55.7M
          0.3623817333531167f,
357
55.7M
          0.0,
358
55.7M
          -0.1743760259965108f,
359
55.7M
          0.36238173335311646f,
360
55.7M
          0.29210266423348785f,
361
55.7M
          -0.4326608024727445f,
362
55.7M
          0.07027906911962818f,
363
55.7M
          0.0,
364
55.7M
          -0.4326608024727457f,
365
55.7M
          0.34875205199302267f,
366
55.7M
      },
367
55.7M
      {
368
55.7M
          0.0,
369
55.7M
          0.0,
370
55.7M
          0.11354987314994337f,
371
55.7M
          -0.07417504595810355f,
372
55.7M
          0.0,
373
55.7M
          0.19402893032594343f,
374
55.7M
          -0.435190496523228f,
375
55.7M
          0.21918684838857466f,
376
55.7M
          0.11354987314994257f,
377
55.7M
          -0.4351904965232251f,
378
55.7M
          0.5550443808910661f,
379
55.7M
          -0.25468277124066463f,
380
55.7M
          -0.07417504595810233f,
381
55.7M
          0.2191868483885728f,
382
55.7M
          -0.25468277124066413f,
383
55.7M
          0.1135498731499429f,
384
55.7M
      },
385
55.7M
  };
386
387
55.7M
  const HWY_CAPPED(float, 16) d;
388
167M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
111M
    auto pixel = Zero(d);
390
1.89G
    for (size_t j = 0; j < 16; j++) {
391
1.78G
      auto cf = Set(d, coeffs[j]);
392
1.78G
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
1.78G
      pixel = MulAdd(cf, basis, pixel);
394
1.78G
    }
395
111M
    Store(pixel, d, pixels + i);
396
111M
  }
397
55.7M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
Line
Count
Source
95
1.03M
void AFVIDCT4x4(const float* JXL_RESTRICT coeffs, float* JXL_RESTRICT pixels) {
96
1.03M
  HWY_ALIGN static constexpr float k4x4AFVBasis[16][16] = {
97
1.03M
      {
98
1.03M
          0.25,
99
1.03M
          0.25,
100
1.03M
          0.25,
101
1.03M
          0.25,
102
1.03M
          0.25,
103
1.03M
          0.25,
104
1.03M
          0.25,
105
1.03M
          0.25,
106
1.03M
          0.25,
107
1.03M
          0.25,
108
1.03M
          0.25,
109
1.03M
          0.25,
110
1.03M
          0.25,
111
1.03M
          0.25,
112
1.03M
          0.25,
113
1.03M
          0.25,
114
1.03M
      },
115
1.03M
      {
116
1.03M
          0.876902929799142f,
117
1.03M
          0.2206518106944235f,
118
1.03M
          -0.10140050393753763f,
119
1.03M
          -0.1014005039375375f,
120
1.03M
          0.2206518106944236f,
121
1.03M
          -0.10140050393753777f,
122
1.03M
          -0.10140050393753772f,
123
1.03M
          -0.10140050393753763f,
124
1.03M
          -0.10140050393753758f,
125
1.03M
          -0.10140050393753769f,
126
1.03M
          -0.1014005039375375f,
127
1.03M
          -0.10140050393753768f,
128
1.03M
          -0.10140050393753768f,
129
1.03M
          -0.10140050393753759f,
130
1.03M
          -0.10140050393753763f,
131
1.03M
          -0.10140050393753741f,
132
1.03M
      },
133
1.03M
      {
134
1.03M
          0.0,
135
1.03M
          0.0,
136
1.03M
          0.40670075830260755f,
137
1.03M
          0.44444816619734445f,
138
1.03M
          0.0,
139
1.03M
          0.0,
140
1.03M
          0.19574399372042936f,
141
1.03M
          0.2929100136981264f,
142
1.03M
          -0.40670075830260716f,
143
1.03M
          -0.19574399372042872f,
144
1.03M
          0.0,
145
1.03M
          0.11379074460448091f,
146
1.03M
          -0.44444816619734384f,
147
1.03M
          -0.29291001369812636f,
148
1.03M
          -0.1137907446044814f,
149
1.03M
          0.0,
150
1.03M
      },
151
1.03M
      {
152
1.03M
          0.0,
153
1.03M
          0.0,
154
1.03M
          -0.21255748058288748f,
155
1.03M
          0.3085497062849767f,
156
1.03M
          0.0,
157
1.03M
          0.4706702258572536f,
158
1.03M
          -0.1621205195722993f,
159
1.03M
          0.0,
160
1.03M
          -0.21255748058287047f,
161
1.03M
          -0.16212051957228327f,
162
1.03M
          -0.47067022585725277f,
163
1.03M
          -0.1464291867126764f,
164
1.03M
          0.3085497062849487f,
165
1.03M
          0.0,
166
1.03M
          -0.14642918671266536f,
167
1.03M
          0.4251149611657548f,
168
1.03M
      },
169
1.03M
      {
170
1.03M
          0.0,
171
1.03M
          -0.7071067811865474f,
172
1.03M
          0.0,
173
1.03M
          0.0,
174
1.03M
          0.7071067811865476f,
175
1.03M
          0.0,
176
1.03M
          0.0,
177
1.03M
          0.0,
178
1.03M
          0.0,
179
1.03M
          0.0,
180
1.03M
          0.0,
181
1.03M
          0.0,
182
1.03M
          0.0,
183
1.03M
          0.0,
184
1.03M
          0.0,
185
1.03M
          0.0,
186
1.03M
      },
187
1.03M
      {
188
1.03M
          -0.4105377591765233f,
189
1.03M
          0.6235485373547691f,
190
1.03M
          -0.06435071657946274f,
191
1.03M
          -0.06435071657946266f,
192
1.03M
          0.6235485373547694f,
193
1.03M
          -0.06435071657946284f,
194
1.03M
          -0.0643507165794628f,
195
1.03M
          -0.06435071657946274f,
196
1.03M
          -0.06435071657946272f,
197
1.03M
          -0.06435071657946279f,
198
1.03M
          -0.06435071657946266f,
199
1.03M
          -0.06435071657946277f,
200
1.03M
          -0.06435071657946277f,
201
1.03M
          -0.06435071657946273f,
202
1.03M
          -0.06435071657946274f,
203
1.03M
          -0.0643507165794626f,
204
1.03M
      },
205
1.03M
      {
206
1.03M
          0.0,
207
1.03M
          0.0,
208
1.03M
          -0.4517556589999482f,
209
1.03M
          0.15854503551840063f,
210
1.03M
          0.0,
211
1.03M
          -0.04038515160822202f,
212
1.03M
          0.0074182263792423875f,
213
1.03M
          0.39351034269210167f,
214
1.03M
          -0.45175565899994635f,
215
1.03M
          0.007418226379244351f,
216
1.03M
          0.1107416575309343f,
217
1.03M
          0.08298163094882051f,
218
1.03M
          0.15854503551839705f,
219
1.03M
          0.3935103426921022f,
220
1.03M
          0.0829816309488214f,
221
1.03M
          -0.45175565899994796f,
222
1.03M
      },
223
1.03M
      {
224
1.03M
          0.0,
225
1.03M
          0.0,
226
1.03M
          -0.304684750724869f,
227
1.03M
          0.5112616136591823f,
228
1.03M
          0.0,
229
1.03M
          0.0,
230
1.03M
          -0.290480129728998f,
231
1.03M
          -0.06578701549142804f,
232
1.03M
          0.304684750724884f,
233
1.03M
          0.2904801297290076f,
234
1.03M
          0.0,
235
1.03M
          -0.23889773523344604f,
236
1.03M
          -0.5112616136592012f,
237
1.03M
          0.06578701549142545f,
238
1.03M
          0.23889773523345467f,
239
1.03M
          0.0,
240
1.03M
      },
241
1.03M
      {
242
1.03M
          0.0,
243
1.03M
          0.0,
244
1.03M
          0.3017929516615495f,
245
1.03M
          0.25792362796341184f,
246
1.03M
          0.0,
247
1.03M
          0.16272340142866204f,
248
1.03M
          0.09520022653475037f,
249
1.03M
          0.0,
250
1.03M
          0.3017929516615503f,
251
1.03M
          0.09520022653475055f,
252
1.03M
          -0.16272340142866173f,
253
1.03M
          -0.35312385449816297f,
254
1.03M
          0.25792362796341295f,
255
1.03M
          0.0,
256
1.03M
          -0.3531238544981624f,
257
1.03M
          -0.6035859033230976f,
258
1.03M
      },
259
1.03M
      {
260
1.03M
          0.0,
261
1.03M
          0.0,
262
1.03M
          0.40824829046386274f,
263
1.03M
          0.0,
264
1.03M
          0.0,
265
1.03M
          0.0,
266
1.03M
          0.0,
267
1.03M
          -0.4082482904638628f,
268
1.03M
          -0.4082482904638635f,
269
1.03M
          0.0,
270
1.03M
          0.0,
271
1.03M
          -0.40824829046386296f,
272
1.03M
          0.0,
273
1.03M
          0.4082482904638634f,
274
1.03M
          0.408248290463863f,
275
1.03M
          0.0,
276
1.03M
      },
277
1.03M
      {
278
1.03M
          0.0,
279
1.03M
          0.0,
280
1.03M
          0.1747866975480809f,
281
1.03M
          0.0812611176717539f,
282
1.03M
          0.0,
283
1.03M
          0.0,
284
1.03M
          -0.3675398009862027f,
285
1.03M
          -0.307882213957909f,
286
1.03M
          -0.17478669754808135f,
287
1.03M
          0.3675398009862011f,
288
1.03M
          0.0,
289
1.03M
          0.4826689115059883f,
290
1.03M
          -0.08126111767175039f,
291
1.03M
          0.30788221395790305f,
292
1.03M
          -0.48266891150598584f,
293
1.03M
          0.0,
294
1.03M
      },
295
1.03M
      {
296
1.03M
          0.0,
297
1.03M
          0.0,
298
1.03M
          -0.21105601049335784f,
299
1.03M
          0.18567180916109802f,
300
1.03M
          0.0,
301
1.03M
          0.0,
302
1.03M
          0.49215859013738733f,
303
1.03M
          -0.38525013709251915f,
304
1.03M
          0.21105601049335806f,
305
1.03M
          -0.49215859013738905f,
306
1.03M
          0.0,
307
1.03M
          0.17419412659916217f,
308
1.03M
          -0.18567180916109904f,
309
1.03M
          0.3852501370925211f,
310
1.03M
          -0.1741941265991621f,
311
1.03M
          0.0,
312
1.03M
      },
313
1.03M
      {
314
1.03M
          0.0,
315
1.03M
          0.0,
316
1.03M
          -0.14266084808807264f,
317
1.03M
          -0.3416446842253372f,
318
1.03M
          0.0,
319
1.03M
          0.7367497537172237f,
320
1.03M
          0.24627107722075148f,
321
1.03M
          -0.08574019035519306f,
322
1.03M
          -0.14266084808807344f,
323
1.03M
          0.24627107722075137f,
324
1.03M
          0.14883399227113567f,
325
1.03M
          -0.04768680350229251f,
326
1.03M
          -0.3416446842253373f,
327
1.03M
          -0.08574019035519267f,
328
1.03M
          -0.047686803502292804f,
329
1.03M
          -0.14266084808807242f,
330
1.03M
      },
331
1.03M
      {
332
1.03M
          0.0,
333
1.03M
          0.0,
334
1.03M
          -0.13813540350758585f,
335
1.03M
          0.3302282550303788f,
336
1.03M
          0.0,
337
1.03M
          0.08755115000587084f,
338
1.03M
          -0.07946706605909573f,
339
1.03M
          -0.4613374887461511f,
340
1.03M
          -0.13813540350758294f,
341
1.03M
          -0.07946706605910261f,
342
1.03M
          0.49724647109535086f,
343
1.03M
          0.12538059448563663f,
344
1.03M
          0.3302282550303805f,
345
1.03M
          -0.4613374887461554f,
346
1.03M
          0.12538059448564315f,
347
1.03M
          -0.13813540350758452f,
348
1.03M
      },
349
1.03M
      {
350
1.03M
          0.0,
351
1.03M
          0.0,
352
1.03M
          -0.17437602599651067f,
353
1.03M
          0.0702790691196284f,
354
1.03M
          0.0,
355
1.03M
          -0.2921026642334881f,
356
1.03M
          0.3623817333531167f,
357
1.03M
          0.0,
358
1.03M
          -0.1743760259965108f,
359
1.03M
          0.36238173335311646f,
360
1.03M
          0.29210266423348785f,
361
1.03M
          -0.4326608024727445f,
362
1.03M
          0.07027906911962818f,
363
1.03M
          0.0,
364
1.03M
          -0.4326608024727457f,
365
1.03M
          0.34875205199302267f,
366
1.03M
      },
367
1.03M
      {
368
1.03M
          0.0,
369
1.03M
          0.0,
370
1.03M
          0.11354987314994337f,
371
1.03M
          -0.07417504595810355f,
372
1.03M
          0.0,
373
1.03M
          0.19402893032594343f,
374
1.03M
          -0.435190496523228f,
375
1.03M
          0.21918684838857466f,
376
1.03M
          0.11354987314994257f,
377
1.03M
          -0.4351904965232251f,
378
1.03M
          0.5550443808910661f,
379
1.03M
          -0.25468277124066463f,
380
1.03M
          -0.07417504595810233f,
381
1.03M
          0.2191868483885728f,
382
1.03M
          -0.25468277124066413f,
383
1.03M
          0.1135498731499429f,
384
1.03M
      },
385
1.03M
  };
386
387
1.03M
  const HWY_CAPPED(float, 16) d;
388
3.11M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
389
2.07M
    auto pixel = Zero(d);
390
35.2M
    for (size_t j = 0; j < 16; j++) {
391
33.1M
      auto cf = Set(d, coeffs[j]);
392
33.1M
      auto basis = Load(d, k4x4AFVBasis[j] + i);
393
33.1M
      pixel = MulAdd(cf, basis, pixel);
394
33.1M
    }
395
2.07M
    Store(pixel, d, pixels + i);
396
2.07M
  }
397
1.03M
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVIDCT4x4(float const*, float*)
398
399
template <size_t afv_kind>
400
void AFVTransformToPixels(const float* JXL_RESTRICT coefficients,
401
56.8M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
56.8M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
56.8M
  size_t afv_x = afv_kind & 1;
404
56.8M
  size_t afv_y = afv_kind / 2;
405
56.8M
  float dcs[3] = {};
406
56.8M
  float block00 = coefficients[0];
407
56.8M
  float block01 = coefficients[1];
408
56.8M
  float block10 = coefficients[8];
409
56.8M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
56.8M
  dcs[1] = (block00 + block10 - block01);
411
56.8M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
56.8M
  HWY_ALIGN float coeff[4 * 4];
414
56.8M
  coeff[0] = dcs[0];
415
284M
  for (size_t iy = 0; iy < 4; iy++) {
416
1.13G
    for (size_t ix = 0; ix < 4; ix++) {
417
908M
      if (ix == 0 && iy == 0) continue;
418
852M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
852M
    }
420
227M
  }
421
56.8M
  HWY_ALIGN float block[4 * 8];
422
56.8M
  AFVIDCT4x4(coeff, block);
423
284M
  for (size_t iy = 0; iy < 4; iy++) {
424
1.13G
    for (size_t ix = 0; ix < 4; ix++) {
425
908M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
908M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
908M
    }
428
227M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
56.8M
  block[0] = dcs[1];
431
284M
  for (size_t iy = 0; iy < 4; iy++) {
432
1.13G
    for (size_t ix = 0; ix < 4; ix++) {
433
908M
      if (ix == 0 && iy == 0) continue;
434
852M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
852M
    }
436
227M
  }
437
56.8M
  ComputeScaledIDCT<4, 4>()(
438
56.8M
      block,
439
56.8M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
56.8M
            pixels_stride),
441
56.8M
      scratch_space);
442
  // IDCT4x8.
443
56.8M
  block[0] = dcs[2];
444
284M
  for (size_t iy = 0; iy < 4; iy++) {
445
2.04G
    for (size_t ix = 0; ix < 8; ix++) {
446
1.81G
      if (ix == 0 && iy == 0) continue;
447
1.76G
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
1.76G
    }
449
227M
  }
450
56.8M
  ComputeScaledIDCT<4, 8>()(
451
56.8M
      block,
452
56.8M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
56.8M
      scratch_space);
454
56.8M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
13.9M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
13.9M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
13.9M
  size_t afv_x = afv_kind & 1;
404
13.9M
  size_t afv_y = afv_kind / 2;
405
13.9M
  float dcs[3] = {};
406
13.9M
  float block00 = coefficients[0];
407
13.9M
  float block01 = coefficients[1];
408
13.9M
  float block10 = coefficients[8];
409
13.9M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
13.9M
  dcs[1] = (block00 + block10 - block01);
411
13.9M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
13.9M
  HWY_ALIGN float coeff[4 * 4];
414
13.9M
  coeff[0] = dcs[0];
415
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
416
278M
    for (size_t ix = 0; ix < 4; ix++) {
417
223M
      if (ix == 0 && iy == 0) continue;
418
209M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
209M
    }
420
55.7M
  }
421
13.9M
  HWY_ALIGN float block[4 * 8];
422
13.9M
  AFVIDCT4x4(coeff, block);
423
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
424
278M
    for (size_t ix = 0; ix < 4; ix++) {
425
223M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
223M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
223M
    }
428
55.7M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
13.9M
  block[0] = dcs[1];
431
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
432
278M
    for (size_t ix = 0; ix < 4; ix++) {
433
223M
      if (ix == 0 && iy == 0) continue;
434
209M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
209M
    }
436
55.7M
  }
437
13.9M
  ComputeScaledIDCT<4, 4>()(
438
13.9M
      block,
439
13.9M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
13.9M
            pixels_stride),
441
13.9M
      scratch_space);
442
  // IDCT4x8.
443
13.9M
  block[0] = dcs[2];
444
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
445
501M
    for (size_t ix = 0; ix < 8; ix++) {
446
446M
      if (ix == 0 && iy == 0) continue;
447
432M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
432M
    }
449
55.7M
  }
450
13.9M
  ComputeScaledIDCT<4, 8>()(
451
13.9M
      block,
452
13.9M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
13.9M
      scratch_space);
454
13.9M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
13.9M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
13.9M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
13.9M
  size_t afv_x = afv_kind & 1;
404
13.9M
  size_t afv_y = afv_kind / 2;
405
13.9M
  float dcs[3] = {};
406
13.9M
  float block00 = coefficients[0];
407
13.9M
  float block01 = coefficients[1];
408
13.9M
  float block10 = coefficients[8];
409
13.9M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
13.9M
  dcs[1] = (block00 + block10 - block01);
411
13.9M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
13.9M
  HWY_ALIGN float coeff[4 * 4];
414
13.9M
  coeff[0] = dcs[0];
415
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
416
278M
    for (size_t ix = 0; ix < 4; ix++) {
417
223M
      if (ix == 0 && iy == 0) continue;
418
209M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
209M
    }
420
55.7M
  }
421
13.9M
  HWY_ALIGN float block[4 * 8];
422
13.9M
  AFVIDCT4x4(coeff, block);
423
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
424
278M
    for (size_t ix = 0; ix < 4; ix++) {
425
223M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
223M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
223M
    }
428
55.7M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
13.9M
  block[0] = dcs[1];
431
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
432
278M
    for (size_t ix = 0; ix < 4; ix++) {
433
223M
      if (ix == 0 && iy == 0) continue;
434
209M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
209M
    }
436
55.7M
  }
437
13.9M
  ComputeScaledIDCT<4, 4>()(
438
13.9M
      block,
439
13.9M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
13.9M
            pixels_stride),
441
13.9M
      scratch_space);
442
  // IDCT4x8.
443
13.9M
  block[0] = dcs[2];
444
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
445
501M
    for (size_t ix = 0; ix < 8; ix++) {
446
446M
      if (ix == 0 && iy == 0) continue;
447
432M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
432M
    }
449
55.7M
  }
450
13.9M
  ComputeScaledIDCT<4, 8>()(
451
13.9M
      block,
452
13.9M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
13.9M
      scratch_space);
454
13.9M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
13.9M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
13.9M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
13.9M
  size_t afv_x = afv_kind & 1;
404
13.9M
  size_t afv_y = afv_kind / 2;
405
13.9M
  float dcs[3] = {};
406
13.9M
  float block00 = coefficients[0];
407
13.9M
  float block01 = coefficients[1];
408
13.9M
  float block10 = coefficients[8];
409
13.9M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
13.9M
  dcs[1] = (block00 + block10 - block01);
411
13.9M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
13.9M
  HWY_ALIGN float coeff[4 * 4];
414
13.9M
  coeff[0] = dcs[0];
415
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
416
278M
    for (size_t ix = 0; ix < 4; ix++) {
417
223M
      if (ix == 0 && iy == 0) continue;
418
209M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
209M
    }
420
55.7M
  }
421
13.9M
  HWY_ALIGN float block[4 * 8];
422
13.9M
  AFVIDCT4x4(coeff, block);
423
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
424
278M
    for (size_t ix = 0; ix < 4; ix++) {
425
223M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
223M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
223M
    }
428
55.7M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
13.9M
  block[0] = dcs[1];
431
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
432
278M
    for (size_t ix = 0; ix < 4; ix++) {
433
223M
      if (ix == 0 && iy == 0) continue;
434
209M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
209M
    }
436
55.7M
  }
437
13.9M
  ComputeScaledIDCT<4, 4>()(
438
13.9M
      block,
439
13.9M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
13.9M
            pixels_stride),
441
13.9M
      scratch_space);
442
  // IDCT4x8.
443
13.9M
  block[0] = dcs[2];
444
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
445
501M
    for (size_t ix = 0; ix < 8; ix++) {
446
446M
      if (ix == 0 && iy == 0) continue;
447
432M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
432M
    }
449
55.7M
  }
450
13.9M
  ComputeScaledIDCT<4, 8>()(
451
13.9M
      block,
452
13.9M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
13.9M
      scratch_space);
454
13.9M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
13.9M
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
13.9M
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
13.9M
  size_t afv_x = afv_kind & 1;
404
13.9M
  size_t afv_y = afv_kind / 2;
405
13.9M
  float dcs[3] = {};
406
13.9M
  float block00 = coefficients[0];
407
13.9M
  float block01 = coefficients[1];
408
13.9M
  float block10 = coefficients[8];
409
13.9M
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
13.9M
  dcs[1] = (block00 + block10 - block01);
411
13.9M
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
13.9M
  HWY_ALIGN float coeff[4 * 4];
414
13.9M
  coeff[0] = dcs[0];
415
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
416
278M
    for (size_t ix = 0; ix < 4; ix++) {
417
223M
      if (ix == 0 && iy == 0) continue;
418
209M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
209M
    }
420
55.7M
  }
421
13.9M
  HWY_ALIGN float block[4 * 8];
422
13.9M
  AFVIDCT4x4(coeff, block);
423
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
424
278M
    for (size_t ix = 0; ix < 4; ix++) {
425
223M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
223M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
223M
    }
428
55.7M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
13.9M
  block[0] = dcs[1];
431
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
432
278M
    for (size_t ix = 0; ix < 4; ix++) {
433
223M
      if (ix == 0 && iy == 0) continue;
434
209M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
209M
    }
436
55.7M
  }
437
13.9M
  ComputeScaledIDCT<4, 4>()(
438
13.9M
      block,
439
13.9M
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
13.9M
            pixels_stride),
441
13.9M
      scratch_space);
442
  // IDCT4x8.
443
13.9M
  block[0] = dcs[2];
444
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
445
501M
    for (size_t ix = 0; ix < 8; ix++) {
446
446M
      if (ix == 0 && iy == 0) continue;
447
432M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
432M
    }
449
55.7M
  }
450
13.9M
  ComputeScaledIDCT<4, 8>()(
451
13.9M
      block,
452
13.9M
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
13.9M
      scratch_space);
454
13.9M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Line
Count
Source
401
347k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
347k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
347k
  size_t afv_x = afv_kind & 1;
404
347k
  size_t afv_y = afv_kind / 2;
405
347k
  float dcs[3] = {};
406
347k
  float block00 = coefficients[0];
407
347k
  float block01 = coefficients[1];
408
347k
  float block10 = coefficients[8];
409
347k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
347k
  dcs[1] = (block00 + block10 - block01);
411
347k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
347k
  HWY_ALIGN float coeff[4 * 4];
414
347k
  coeff[0] = dcs[0];
415
1.73M
  for (size_t iy = 0; iy < 4; iy++) {
416
6.95M
    for (size_t ix = 0; ix < 4; ix++) {
417
5.56M
      if (ix == 0 && iy == 0) continue;
418
5.21M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
5.21M
    }
420
1.39M
  }
421
347k
  HWY_ALIGN float block[4 * 8];
422
347k
  AFVIDCT4x4(coeff, block);
423
1.73M
  for (size_t iy = 0; iy < 4; iy++) {
424
6.95M
    for (size_t ix = 0; ix < 4; ix++) {
425
5.56M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
5.56M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
5.56M
    }
428
1.39M
  }
429
  // IDCT4x4 in (odd, even) positions.
430
347k
  block[0] = dcs[1];
431
1.73M
  for (size_t iy = 0; iy < 4; iy++) {
432
6.95M
    for (size_t ix = 0; ix < 4; ix++) {
433
5.56M
      if (ix == 0 && iy == 0) continue;
434
5.21M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
5.21M
    }
436
1.39M
  }
437
347k
  ComputeScaledIDCT<4, 4>()(
438
347k
      block,
439
347k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
347k
            pixels_stride),
441
347k
      scratch_space);
442
  // IDCT4x8.
443
347k
  block[0] = dcs[2];
444
1.73M
  for (size_t iy = 0; iy < 4; iy++) {
445
12.5M
    for (size_t ix = 0; ix < 8; ix++) {
446
11.1M
      if (ix == 0 && iy == 0) continue;
447
10.7M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
10.7M
    }
449
1.39M
  }
450
347k
  ComputeScaledIDCT<4, 8>()(
451
347k
      block,
452
347k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
347k
      scratch_space);
454
347k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Line
Count
Source
401
193k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
193k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
193k
  size_t afv_x = afv_kind & 1;
404
193k
  size_t afv_y = afv_kind / 2;
405
193k
  float dcs[3] = {};
406
193k
  float block00 = coefficients[0];
407
193k
  float block01 = coefficients[1];
408
193k
  float block10 = coefficients[8];
409
193k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
193k
  dcs[1] = (block00 + block10 - block01);
411
193k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
193k
  HWY_ALIGN float coeff[4 * 4];
414
193k
  coeff[0] = dcs[0];
415
968k
  for (size_t iy = 0; iy < 4; iy++) {
416
3.87M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.09M
      if (ix == 0 && iy == 0) continue;
418
2.90M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
2.90M
    }
420
774k
  }
421
193k
  HWY_ALIGN float block[4 * 8];
422
193k
  AFVIDCT4x4(coeff, block);
423
968k
  for (size_t iy = 0; iy < 4; iy++) {
424
3.87M
    for (size_t ix = 0; ix < 4; ix++) {
425
3.09M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
3.09M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
3.09M
    }
428
774k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
193k
  block[0] = dcs[1];
431
968k
  for (size_t iy = 0; iy < 4; iy++) {
432
3.87M
    for (size_t ix = 0; ix < 4; ix++) {
433
3.09M
      if (ix == 0 && iy == 0) continue;
434
2.90M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
2.90M
    }
436
774k
  }
437
193k
  ComputeScaledIDCT<4, 4>()(
438
193k
      block,
439
193k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
193k
            pixels_stride),
441
193k
      scratch_space);
442
  // IDCT4x8.
443
193k
  block[0] = dcs[2];
444
968k
  for (size_t iy = 0; iy < 4; iy++) {
445
6.96M
    for (size_t ix = 0; ix < 8; ix++) {
446
6.19M
      if (ix == 0 && iy == 0) continue;
447
6.00M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
6.00M
    }
449
774k
  }
450
193k
  ComputeScaledIDCT<4, 8>()(
451
193k
      block,
452
193k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
193k
      scratch_space);
454
193k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Line
Count
Source
401
245k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
245k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
245k
  size_t afv_x = afv_kind & 1;
404
245k
  size_t afv_y = afv_kind / 2;
405
245k
  float dcs[3] = {};
406
245k
  float block00 = coefficients[0];
407
245k
  float block01 = coefficients[1];
408
245k
  float block10 = coefficients[8];
409
245k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
245k
  dcs[1] = (block00 + block10 - block01);
411
245k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
245k
  HWY_ALIGN float coeff[4 * 4];
414
245k
  coeff[0] = dcs[0];
415
1.22M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.91M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.93M
      if (ix == 0 && iy == 0) continue;
418
3.68M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
3.68M
    }
420
982k
  }
421
245k
  HWY_ALIGN float block[4 * 8];
422
245k
  AFVIDCT4x4(coeff, block);
423
1.22M
  for (size_t iy = 0; iy < 4; iy++) {
424
4.91M
    for (size_t ix = 0; ix < 4; ix++) {
425
3.93M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
3.93M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
3.93M
    }
428
982k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
245k
  block[0] = dcs[1];
431
1.22M
  for (size_t iy = 0; iy < 4; iy++) {
432
4.91M
    for (size_t ix = 0; ix < 4; ix++) {
433
3.93M
      if (ix == 0 && iy == 0) continue;
434
3.68M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
3.68M
    }
436
982k
  }
437
245k
  ComputeScaledIDCT<4, 4>()(
438
245k
      block,
439
245k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
245k
            pixels_stride),
441
245k
      scratch_space);
442
  // IDCT4x8.
443
245k
  block[0] = dcs[2];
444
1.22M
  for (size_t iy = 0; iy < 4; iy++) {
445
8.84M
    for (size_t ix = 0; ix < 8; ix++) {
446
7.86M
      if (ix == 0 && iy == 0) continue;
447
7.61M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
7.61M
    }
449
982k
  }
450
245k
  ComputeScaledIDCT<4, 8>()(
451
245k
      block,
452
245k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
245k
      scratch_space);
454
245k
}
dec_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
Line
Count
Source
401
249k
                          float* JXL_RESTRICT pixels, size_t pixels_stride) {
402
249k
  HWY_ALIGN float scratch_space[4 * 8 * 4];
403
249k
  size_t afv_x = afv_kind & 1;
404
249k
  size_t afv_y = afv_kind / 2;
405
249k
  float dcs[3] = {};
406
249k
  float block00 = coefficients[0];
407
249k
  float block01 = coefficients[1];
408
249k
  float block10 = coefficients[8];
409
249k
  dcs[0] = (block00 + block10 + block01) * 4.0f;
410
249k
  dcs[1] = (block00 + block10 - block01);
411
249k
  dcs[2] = block00 - block10;
412
  // IAFV: (even, even) positions.
413
249k
  HWY_ALIGN float coeff[4 * 4];
414
249k
  coeff[0] = dcs[0];
415
1.24M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.99M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.99M
      if (ix == 0 && iy == 0) continue;
418
3.74M
      coeff[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2];
419
3.74M
    }
420
998k
  }
421
249k
  HWY_ALIGN float block[4 * 8];
422
249k
  AFVIDCT4x4(coeff, block);
423
1.24M
  for (size_t iy = 0; iy < 4; iy++) {
424
4.99M
    for (size_t ix = 0; ix < 4; ix++) {
425
3.99M
      pixels[(iy + afv_y * 4) * pixels_stride + afv_x * 4 + ix] =
426
3.99M
          block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)];
427
3.99M
    }
428
998k
  }
429
  // IDCT4x4 in (odd, even) positions.
430
249k
  block[0] = dcs[1];
431
1.24M
  for (size_t iy = 0; iy < 4; iy++) {
432
4.99M
    for (size_t ix = 0; ix < 4; ix++) {
433
3.99M
      if (ix == 0 && iy == 0) continue;
434
3.74M
      block[iy * 4 + ix] = coefficients[iy * 2 * 8 + ix * 2 + 1];
435
3.74M
    }
436
998k
  }
437
249k
  ComputeScaledIDCT<4, 4>()(
438
249k
      block,
439
249k
      DCTTo(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
440
249k
            pixels_stride),
441
249k
      scratch_space);
442
  // IDCT4x8.
443
249k
  block[0] = dcs[2];
444
1.24M
  for (size_t iy = 0; iy < 4; iy++) {
445
8.98M
    for (size_t ix = 0; ix < 8; ix++) {
446
7.98M
      if (ix == 0 && iy == 0) continue;
447
7.73M
      block[iy * 8 + ix] = coefficients[(1 + iy * 2) * 8 + ix];
448
7.73M
    }
449
998k
  }
450
249k
  ComputeScaledIDCT<4, 8>()(
451
249k
      block,
452
249k
      DCTTo(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
453
249k
      scratch_space);
454
249k
}
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<0ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<1ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<2ul>(float const*, float*, unsigned long)
Unexecuted instantiation: dec_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformToPixels<3ul>(float const*, float*, unsigned long)
455
456
HWY_MAYBE_UNUSED void TransformToPixels(const AcStrategyType strategy,
457
                                        float* JXL_RESTRICT coefficients,
458
                                        float* JXL_RESTRICT pixels,
459
                                        size_t pixels_stride,
460
194M
                                        float* scratch_space) {
461
194M
  using Type = AcStrategyType;
462
194M
  switch (strategy) {
463
17.2M
    case Type::IDENTITY: {
464
17.2M
      float dcs[4] = {};
465
17.2M
      float block00 = coefficients[0];
466
17.2M
      float block01 = coefficients[1];
467
17.2M
      float block10 = coefficients[8];
468
17.2M
      float block11 = coefficients[9];
469
17.2M
      dcs[0] = block00 + block01 + block10 + block11;
470
17.2M
      dcs[1] = block00 + block01 - block10 - block11;
471
17.2M
      dcs[2] = block00 - block01 + block10 - block11;
472
17.2M
      dcs[3] = block00 - block01 - block10 + block11;
473
51.6M
      for (size_t y = 0; y < 2; y++) {
474
103M
        for (size_t x = 0; x < 2; x++) {
475
68.8M
          float block_dc = dcs[y * 2 + x];
476
68.8M
          float residual_sum = 0;
477
344M
          for (size_t iy = 0; iy < 4; iy++) {
478
1.37G
            for (size_t ix = 0; ix < 4; ix++) {
479
1.10G
              if (ix == 0 && iy == 0) continue;
480
1.03G
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
1.03G
            }
482
275M
          }
483
68.8M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
68.8M
              block_dc - residual_sum * (1.0f / 16);
485
344M
          for (size_t iy = 0; iy < 4; iy++) {
486
1.37G
            for (size_t ix = 0; ix < 4; ix++) {
487
1.10G
              if (ix == 1 && iy == 1) continue;
488
1.03G
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
1.03G
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
1.03G
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
1.03G
            }
492
275M
          }
493
68.8M
          pixels[y * 4 * pixels_stride + x * 4] =
494
68.8M
              coefficients[(y + 2) * 8 + x + 2] +
495
68.8M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
68.8M
        }
497
34.4M
      }
498
17.2M
      break;
499
0
    }
500
14.4M
    case Type::DCT8X4: {
501
14.4M
      float dcs[2] = {};
502
14.4M
      float block0 = coefficients[0];
503
14.4M
      float block1 = coefficients[8];
504
14.4M
      dcs[0] = block0 + block1;
505
14.4M
      dcs[1] = block0 - block1;
506
43.4M
      for (size_t x = 0; x < 2; x++) {
507
28.9M
        HWY_ALIGN float block[4 * 8];
508
28.9M
        block[0] = dcs[x];
509
144M
        for (size_t iy = 0; iy < 4; iy++) {
510
1.04G
          for (size_t ix = 0; ix < 8; ix++) {
511
927M
            if (ix == 0 && iy == 0) continue;
512
898M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
898M
          }
514
115M
        }
515
28.9M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
28.9M
                                  scratch_space);
517
28.9M
      }
518
14.4M
      break;
519
0
    }
520
14.1M
    case Type::DCT4X8: {
521
14.1M
      float dcs[2] = {};
522
14.1M
      float block0 = coefficients[0];
523
14.1M
      float block1 = coefficients[8];
524
14.1M
      dcs[0] = block0 + block1;
525
14.1M
      dcs[1] = block0 - block1;
526
42.5M
      for (size_t y = 0; y < 2; y++) {
527
28.3M
        HWY_ALIGN float block[4 * 8];
528
28.3M
        block[0] = dcs[y];
529
141M
        for (size_t iy = 0; iy < 4; iy++) {
530
1.02G
          for (size_t ix = 0; ix < 8; ix++) {
531
907M
            if (ix == 0 && iy == 0) continue;
532
879M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
879M
          }
534
113M
        }
535
28.3M
        ComputeScaledIDCT<4, 8>()(
536
28.3M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
28.3M
            scratch_space);
538
28.3M
      }
539
14.1M
      break;
540
0
    }
541
13.9M
    case Type::DCT4X4: {
542
13.9M
      float dcs[4] = {};
543
13.9M
      float block00 = coefficients[0];
544
13.9M
      float block01 = coefficients[1];
545
13.9M
      float block10 = coefficients[8];
546
13.9M
      float block11 = coefficients[9];
547
13.9M
      dcs[0] = block00 + block01 + block10 + block11;
548
13.9M
      dcs[1] = block00 + block01 - block10 - block11;
549
13.9M
      dcs[2] = block00 - block01 + block10 - block11;
550
13.9M
      dcs[3] = block00 - block01 - block10 + block11;
551
41.8M
      for (size_t y = 0; y < 2; y++) {
552
83.6M
        for (size_t x = 0; x < 2; x++) {
553
55.7M
          HWY_ALIGN float block[4 * 4];
554
55.7M
          block[0] = dcs[y * 2 + x];
555
278M
          for (size_t iy = 0; iy < 4; iy++) {
556
1.11G
            for (size_t ix = 0; ix < 4; ix++) {
557
892M
              if (ix == 0 && iy == 0) continue;
558
836M
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
836M
            }
560
223M
          }
561
55.7M
          ComputeScaledIDCT<4, 4>()(
562
55.7M
              block,
563
55.7M
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
55.7M
              scratch_space);
565
55.7M
        }
566
27.8M
      }
567
13.9M
      break;
568
0
    }
569
20.7M
    case Type::DCT2X2: {
570
20.7M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
20.7M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
20.7M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
20.7M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
20.7M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
186M
      for (size_t y = 0; y < kBlockDim; y++) {
576
1.49G
        for (size_t x = 0; x < kBlockDim; x++) {
577
1.32G
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
1.32G
        }
579
165M
      }
580
20.7M
      break;
581
0
    }
582
6.24M
    case Type::DCT16X16: {
583
6.24M
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
6.24M
                                  scratch_space);
585
6.24M
      break;
586
0
    }
587
12.0M
    case Type::DCT16X8: {
588
12.0M
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
12.0M
                                 scratch_space);
590
12.0M
      break;
591
0
    }
592
12.0M
    case Type::DCT8X16: {
593
12.0M
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
12.0M
                                 scratch_space);
595
12.0M
      break;
596
0
    }
597
708
    case Type::DCT32X8: {
598
708
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
708
                                 scratch_space);
600
708
      break;
601
0
    }
602
108
    case Type::DCT8X32: {
603
108
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
108
                                 scratch_space);
605
108
      break;
606
0
    }
607
2.38M
    case Type::DCT32X16: {
608
2.38M
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
2.38M
                                  scratch_space);
610
2.38M
      break;
611
0
    }
612
2.36M
    case Type::DCT16X32: {
613
2.36M
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
2.36M
                                  scratch_space);
615
2.36M
      break;
616
0
    }
617
1.42M
    case Type::DCT32X32: {
618
1.42M
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
1.42M
                                  scratch_space);
620
1.42M
      break;
621
0
    }
622
19.5M
    case Type::DCT: {
623
19.5M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
19.5M
                                scratch_space);
625
19.5M
      break;
626
0
    }
627
14.2M
    case Type::AFV0: {
628
14.2M
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
14.2M
      break;
630
0
    }
631
14.1M
    case Type::AFV1: {
632
14.1M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
14.1M
      break;
634
0
    }
635
14.1M
    case Type::AFV2: {
636
14.1M
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
14.1M
      break;
638
0
    }
639
14.1M
    case Type::AFV3: {
640
14.1M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
14.1M
      break;
642
0
    }
643
723k
    case Type::DCT64X32: {
644
723k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
723k
                                  scratch_space);
646
723k
      break;
647
0
    }
648
432k
    case Type::DCT32X64: {
649
432k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
432k
                                  scratch_space);
651
432k
      break;
652
0
    }
653
356k
    case Type::DCT64X64: {
654
356k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
356k
                                  scratch_space);
656
356k
      break;
657
0
    }
658
3
    case Type::DCT128X64: {
659
3
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
3
                                   scratch_space);
661
3
      break;
662
0
    }
663
9
    case Type::DCT64X128: {
664
9
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
9
                                   scratch_space);
666
9
      break;
667
0
    }
668
6
    case Type::DCT128X128: {
669
6
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
6
                                    scratch_space);
671
6
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
194M
  }
689
194M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
174M
                                        float* scratch_space) {
461
174M
  using Type = AcStrategyType;
462
174M
  switch (strategy) {
463
13.9M
    case Type::IDENTITY: {
464
13.9M
      float dcs[4] = {};
465
13.9M
      float block00 = coefficients[0];
466
13.9M
      float block01 = coefficients[1];
467
13.9M
      float block10 = coefficients[8];
468
13.9M
      float block11 = coefficients[9];
469
13.9M
      dcs[0] = block00 + block01 + block10 + block11;
470
13.9M
      dcs[1] = block00 + block01 - block10 - block11;
471
13.9M
      dcs[2] = block00 - block01 + block10 - block11;
472
13.9M
      dcs[3] = block00 - block01 - block10 + block11;
473
41.8M
      for (size_t y = 0; y < 2; y++) {
474
83.6M
        for (size_t x = 0; x < 2; x++) {
475
55.7M
          float block_dc = dcs[y * 2 + x];
476
55.7M
          float residual_sum = 0;
477
278M
          for (size_t iy = 0; iy < 4; iy++) {
478
1.11G
            for (size_t ix = 0; ix < 4; ix++) {
479
892M
              if (ix == 0 && iy == 0) continue;
480
836M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
836M
            }
482
223M
          }
483
55.7M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
55.7M
              block_dc - residual_sum * (1.0f / 16);
485
278M
          for (size_t iy = 0; iy < 4; iy++) {
486
1.11G
            for (size_t ix = 0; ix < 4; ix++) {
487
892M
              if (ix == 1 && iy == 1) continue;
488
836M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
836M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
836M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
836M
            }
492
223M
          }
493
55.7M
          pixels[y * 4 * pixels_stride + x * 4] =
494
55.7M
              coefficients[(y + 2) * 8 + x + 2] +
495
55.7M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
55.7M
        }
497
27.8M
      }
498
13.9M
      break;
499
0
    }
500
13.9M
    case Type::DCT8X4: {
501
13.9M
      float dcs[2] = {};
502
13.9M
      float block0 = coefficients[0];
503
13.9M
      float block1 = coefficients[8];
504
13.9M
      dcs[0] = block0 + block1;
505
13.9M
      dcs[1] = block0 - block1;
506
41.8M
      for (size_t x = 0; x < 2; x++) {
507
27.8M
        HWY_ALIGN float block[4 * 8];
508
27.8M
        block[0] = dcs[x];
509
139M
        for (size_t iy = 0; iy < 4; iy++) {
510
1.00G
          for (size_t ix = 0; ix < 8; ix++) {
511
892M
            if (ix == 0 && iy == 0) continue;
512
864M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
864M
          }
514
111M
        }
515
27.8M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
27.8M
                                  scratch_space);
517
27.8M
      }
518
13.9M
      break;
519
0
    }
520
13.9M
    case Type::DCT4X8: {
521
13.9M
      float dcs[2] = {};
522
13.9M
      float block0 = coefficients[0];
523
13.9M
      float block1 = coefficients[8];
524
13.9M
      dcs[0] = block0 + block1;
525
13.9M
      dcs[1] = block0 - block1;
526
41.8M
      for (size_t y = 0; y < 2; y++) {
527
27.8M
        HWY_ALIGN float block[4 * 8];
528
27.8M
        block[0] = dcs[y];
529
139M
        for (size_t iy = 0; iy < 4; iy++) {
530
1.00G
          for (size_t ix = 0; ix < 8; ix++) {
531
892M
            if (ix == 0 && iy == 0) continue;
532
864M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
864M
          }
534
111M
        }
535
27.8M
        ComputeScaledIDCT<4, 8>()(
536
27.8M
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
27.8M
            scratch_space);
538
27.8M
      }
539
13.9M
      break;
540
0
    }
541
13.9M
    case Type::DCT4X4: {
542
13.9M
      float dcs[4] = {};
543
13.9M
      float block00 = coefficients[0];
544
13.9M
      float block01 = coefficients[1];
545
13.9M
      float block10 = coefficients[8];
546
13.9M
      float block11 = coefficients[9];
547
13.9M
      dcs[0] = block00 + block01 + block10 + block11;
548
13.9M
      dcs[1] = block00 + block01 - block10 - block11;
549
13.9M
      dcs[2] = block00 - block01 + block10 - block11;
550
13.9M
      dcs[3] = block00 - block01 - block10 + block11;
551
41.8M
      for (size_t y = 0; y < 2; y++) {
552
83.6M
        for (size_t x = 0; x < 2; x++) {
553
55.7M
          HWY_ALIGN float block[4 * 4];
554
55.7M
          block[0] = dcs[y * 2 + x];
555
278M
          for (size_t iy = 0; iy < 4; iy++) {
556
1.11G
            for (size_t ix = 0; ix < 4; ix++) {
557
892M
              if (ix == 0 && iy == 0) continue;
558
836M
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
836M
            }
560
223M
          }
561
55.7M
          ComputeScaledIDCT<4, 4>()(
562
55.7M
              block,
563
55.7M
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
55.7M
              scratch_space);
565
55.7M
        }
566
27.8M
      }
567
13.9M
      break;
568
0
    }
569
13.9M
    case Type::DCT2X2: {
570
13.9M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
13.9M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
13.9M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
13.9M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
13.9M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
125M
      for (size_t y = 0; y < kBlockDim; y++) {
576
1.00G
        for (size_t x = 0; x < kBlockDim; x++) {
577
892M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
892M
        }
579
111M
      }
580
13.9M
      break;
581
0
    }
582
5.69M
    case Type::DCT16X16: {
583
5.69M
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
5.69M
                                  scratch_space);
585
5.69M
      break;
586
0
    }
587
11.2M
    case Type::DCT16X8: {
588
11.2M
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
11.2M
                                 scratch_space);
590
11.2M
      break;
591
0
    }
592
11.2M
    case Type::DCT8X16: {
593
11.2M
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
11.2M
                                 scratch_space);
595
11.2M
      break;
596
0
    }
597
0
    case Type::DCT32X8: {
598
0
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
0
                                 scratch_space);
600
0
      break;
601
0
    }
602
0
    case Type::DCT8X32: {
603
0
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
0
                                 scratch_space);
605
0
      break;
606
0
    }
607
2.21M
    case Type::DCT32X16: {
608
2.21M
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
2.21M
                                  scratch_space);
610
2.21M
      break;
611
0
    }
612
2.19M
    case Type::DCT16X32: {
613
2.19M
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
2.19M
                                  scratch_space);
615
2.19M
      break;
616
0
    }
617
1.12M
    case Type::DCT32X32: {
618
1.12M
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
1.12M
                                  scratch_space);
620
1.12M
      break;
621
0
    }
622
13.9M
    case Type::DCT: {
623
13.9M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
13.9M
                                scratch_space);
625
13.9M
      break;
626
0
    }
627
13.9M
    case Type::AFV0: {
628
13.9M
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
13.9M
      break;
630
0
    }
631
13.9M
    case Type::AFV1: {
632
13.9M
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
13.9M
      break;
634
0
    }
635
13.9M
    case Type::AFV2: {
636
13.9M
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
13.9M
      break;
638
0
    }
639
13.9M
    case Type::AFV3: {
640
13.9M
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
13.9M
      break;
642
0
    }
643
674k
    case Type::DCT64X32: {
644
674k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
674k
                                  scratch_space);
646
674k
      break;
647
0
    }
648
412k
    case Type::DCT32X64: {
649
412k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
412k
                                  scratch_space);
651
412k
      break;
652
0
    }
653
192k
    case Type::DCT64X64: {
654
192k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
192k
                                  scratch_space);
656
192k
      break;
657
0
    }
658
0
    case Type::DCT128X64: {
659
0
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT64X128: {
664
0
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
0
    case Type::DCT128X128: {
669
0
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
0
                                    scratch_space);
671
0
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
174M
  }
689
174M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
Line
Count
Source
460
20.5M
                                        float* scratch_space) {
461
20.5M
  using Type = AcStrategyType;
462
20.5M
  switch (strategy) {
463
3.27M
    case Type::IDENTITY: {
464
3.27M
      float dcs[4] = {};
465
3.27M
      float block00 = coefficients[0];
466
3.27M
      float block01 = coefficients[1];
467
3.27M
      float block10 = coefficients[8];
468
3.27M
      float block11 = coefficients[9];
469
3.27M
      dcs[0] = block00 + block01 + block10 + block11;
470
3.27M
      dcs[1] = block00 + block01 - block10 - block11;
471
3.27M
      dcs[2] = block00 - block01 + block10 - block11;
472
3.27M
      dcs[3] = block00 - block01 - block10 + block11;
473
9.82M
      for (size_t y = 0; y < 2; y++) {
474
19.6M
        for (size_t x = 0; x < 2; x++) {
475
13.0M
          float block_dc = dcs[y * 2 + x];
476
13.0M
          float residual_sum = 0;
477
65.4M
          for (size_t iy = 0; iy < 4; iy++) {
478
261M
            for (size_t ix = 0; ix < 4; ix++) {
479
209M
              if (ix == 0 && iy == 0) continue;
480
196M
              residual_sum += coefficients[(y + iy * 2) * 8 + x + ix * 2];
481
196M
            }
482
52.3M
          }
483
13.0M
          pixels[(4 * y + 1) * pixels_stride + 4 * x + 1] =
484
13.0M
              block_dc - residual_sum * (1.0f / 16);
485
65.4M
          for (size_t iy = 0; iy < 4; iy++) {
486
261M
            for (size_t ix = 0; ix < 4; ix++) {
487
209M
              if (ix == 1 && iy == 1) continue;
488
196M
              pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] =
489
196M
                  coefficients[(y + iy * 2) * 8 + x + ix * 2] +
490
196M
                  pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
491
196M
            }
492
52.3M
          }
493
13.0M
          pixels[y * 4 * pixels_stride + x * 4] =
494
13.0M
              coefficients[(y + 2) * 8 + x + 2] +
495
13.0M
              pixels[(4 * y + 1) * pixels_stride + 4 * x + 1];
496
13.0M
        }
497
6.54M
      }
498
3.27M
      break;
499
0
    }
500
546k
    case Type::DCT8X4: {
501
546k
      float dcs[2] = {};
502
546k
      float block0 = coefficients[0];
503
546k
      float block1 = coefficients[8];
504
546k
      dcs[0] = block0 + block1;
505
546k
      dcs[1] = block0 - block1;
506
1.64M
      for (size_t x = 0; x < 2; x++) {
507
1.09M
        HWY_ALIGN float block[4 * 8];
508
1.09M
        block[0] = dcs[x];
509
5.46M
        for (size_t iy = 0; iy < 4; iy++) {
510
39.3M
          for (size_t ix = 0; ix < 8; ix++) {
511
34.9M
            if (ix == 0 && iy == 0) continue;
512
33.9M
            block[iy * 8 + ix] = coefficients[(x + iy * 2) * 8 + ix];
513
33.9M
          }
514
4.37M
        }
515
1.09M
        ComputeScaledIDCT<8, 4>()(block, DCTTo(pixels + x * 4, pixels_stride),
516
1.09M
                                  scratch_space);
517
1.09M
      }
518
546k
      break;
519
0
    }
520
235k
    case Type::DCT4X8: {
521
235k
      float dcs[2] = {};
522
235k
      float block0 = coefficients[0];
523
235k
      float block1 = coefficients[8];
524
235k
      dcs[0] = block0 + block1;
525
235k
      dcs[1] = block0 - block1;
526
705k
      for (size_t y = 0; y < 2; y++) {
527
470k
        HWY_ALIGN float block[4 * 8];
528
470k
        block[0] = dcs[y];
529
2.35M
        for (size_t iy = 0; iy < 4; iy++) {
530
16.9M
          for (size_t ix = 0; ix < 8; ix++) {
531
15.0M
            if (ix == 0 && iy == 0) continue;
532
14.5M
            block[iy * 8 + ix] = coefficients[(y + iy * 2) * 8 + ix];
533
14.5M
          }
534
1.88M
        }
535
470k
        ComputeScaledIDCT<4, 8>()(
536
470k
            block, DCTTo(pixels + y * 4 * pixels_stride, pixels_stride),
537
470k
            scratch_space);
538
470k
      }
539
235k
      break;
540
0
    }
541
2.68k
    case Type::DCT4X4: {
542
2.68k
      float dcs[4] = {};
543
2.68k
      float block00 = coefficients[0];
544
2.68k
      float block01 = coefficients[1];
545
2.68k
      float block10 = coefficients[8];
546
2.68k
      float block11 = coefficients[9];
547
2.68k
      dcs[0] = block00 + block01 + block10 + block11;
548
2.68k
      dcs[1] = block00 + block01 - block10 - block11;
549
2.68k
      dcs[2] = block00 - block01 + block10 - block11;
550
2.68k
      dcs[3] = block00 - block01 - block10 + block11;
551
8.05k
      for (size_t y = 0; y < 2; y++) {
552
16.1k
        for (size_t x = 0; x < 2; x++) {
553
10.7k
          HWY_ALIGN float block[4 * 4];
554
10.7k
          block[0] = dcs[y * 2 + x];
555
53.7k
          for (size_t iy = 0; iy < 4; iy++) {
556
214k
            for (size_t ix = 0; ix < 4; ix++) {
557
171k
              if (ix == 0 && iy == 0) continue;
558
161k
              block[iy * 4 + ix] = coefficients[(y + iy * 2) * 8 + x + ix * 2];
559
161k
            }
560
42.9k
          }
561
10.7k
          ComputeScaledIDCT<4, 4>()(
562
10.7k
              block,
563
10.7k
              DCTTo(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
564
10.7k
              scratch_space);
565
10.7k
        }
566
5.37k
      }
567
2.68k
      break;
568
0
    }
569
6.75M
    case Type::DCT2X2: {
570
6.75M
      HWY_ALIGN float coeffs[kDCTBlockSize];
571
6.75M
      memcpy(coeffs, coefficients, sizeof(float) * kDCTBlockSize);
572
6.75M
      IDCT2TopBlock<2>(coeffs, kBlockDim, coeffs);
573
6.75M
      IDCT2TopBlock<4>(coeffs, kBlockDim, coeffs);
574
6.75M
      IDCT2TopBlock<8>(coeffs, kBlockDim, coeffs);
575
60.8M
      for (size_t y = 0; y < kBlockDim; y++) {
576
486M
        for (size_t x = 0; x < kBlockDim; x++) {
577
432M
          pixels[y * pixels_stride + x] = coeffs[y * kBlockDim + x];
578
432M
        }
579
54.0M
      }
580
6.75M
      break;
581
0
    }
582
547k
    case Type::DCT16X16: {
583
547k
      ComputeScaledIDCT<16, 16>()(coefficients, DCTTo(pixels, pixels_stride),
584
547k
                                  scratch_space);
585
547k
      break;
586
0
    }
587
775k
    case Type::DCT16X8: {
588
775k
      ComputeScaledIDCT<16, 8>()(coefficients, DCTTo(pixels, pixels_stride),
589
775k
                                 scratch_space);
590
775k
      break;
591
0
    }
592
838k
    case Type::DCT8X16: {
593
838k
      ComputeScaledIDCT<8, 16>()(coefficients, DCTTo(pixels, pixels_stride),
594
838k
                                 scratch_space);
595
838k
      break;
596
0
    }
597
708
    case Type::DCT32X8: {
598
708
      ComputeScaledIDCT<32, 8>()(coefficients, DCTTo(pixels, pixels_stride),
599
708
                                 scratch_space);
600
708
      break;
601
0
    }
602
108
    case Type::DCT8X32: {
603
108
      ComputeScaledIDCT<8, 32>()(coefficients, DCTTo(pixels, pixels_stride),
604
108
                                 scratch_space);
605
108
      break;
606
0
    }
607
173k
    case Type::DCT32X16: {
608
173k
      ComputeScaledIDCT<32, 16>()(coefficients, DCTTo(pixels, pixels_stride),
609
173k
                                  scratch_space);
610
173k
      break;
611
0
    }
612
173k
    case Type::DCT16X32: {
613
173k
      ComputeScaledIDCT<16, 32>()(coefficients, DCTTo(pixels, pixels_stride),
614
173k
                                  scratch_space);
615
173k
      break;
616
0
    }
617
298k
    case Type::DCT32X32: {
618
298k
      ComputeScaledIDCT<32, 32>()(coefficients, DCTTo(pixels, pixels_stride),
619
298k
                                  scratch_space);
620
298k
      break;
621
0
    }
622
5.60M
    case Type::DCT: {
623
5.60M
      ComputeScaledIDCT<8, 8>()(coefficients, DCTTo(pixels, pixels_stride),
624
5.60M
                                scratch_space);
625
5.60M
      break;
626
0
    }
627
347k
    case Type::AFV0: {
628
347k
      AFVTransformToPixels<0>(coefficients, pixels, pixels_stride);
629
347k
      break;
630
0
    }
631
193k
    case Type::AFV1: {
632
193k
      AFVTransformToPixels<1>(coefficients, pixels, pixels_stride);
633
193k
      break;
634
0
    }
635
245k
    case Type::AFV2: {
636
245k
      AFVTransformToPixels<2>(coefficients, pixels, pixels_stride);
637
245k
      break;
638
0
    }
639
249k
    case Type::AFV3: {
640
249k
      AFVTransformToPixels<3>(coefficients, pixels, pixels_stride);
641
249k
      break;
642
0
    }
643
48.6k
    case Type::DCT64X32: {
644
48.6k
      ComputeScaledIDCT<64, 32>()(coefficients, DCTTo(pixels, pixels_stride),
645
48.6k
                                  scratch_space);
646
48.6k
      break;
647
0
    }
648
19.2k
    case Type::DCT32X64: {
649
19.2k
      ComputeScaledIDCT<32, 64>()(coefficients, DCTTo(pixels, pixels_stride),
650
19.2k
                                  scratch_space);
651
19.2k
      break;
652
0
    }
653
163k
    case Type::DCT64X64: {
654
163k
      ComputeScaledIDCT<64, 64>()(coefficients, DCTTo(pixels, pixels_stride),
655
163k
                                  scratch_space);
656
163k
      break;
657
0
    }
658
3
    case Type::DCT128X64: {
659
3
      ComputeScaledIDCT<128, 64>()(coefficients, DCTTo(pixels, pixels_stride),
660
3
                                   scratch_space);
661
3
      break;
662
0
    }
663
9
    case Type::DCT64X128: {
664
9
      ComputeScaledIDCT<64, 128>()(coefficients, DCTTo(pixels, pixels_stride),
665
9
                                   scratch_space);
666
9
      break;
667
0
    }
668
6
    case Type::DCT128X128: {
669
6
      ComputeScaledIDCT<128, 128>()(coefficients, DCTTo(pixels, pixels_stride),
670
6
                                    scratch_space);
671
6
      break;
672
0
    }
673
0
    case Type::DCT256X128: {
674
0
      ComputeScaledIDCT<256, 128>()(coefficients, DCTTo(pixels, pixels_stride),
675
0
                                    scratch_space);
676
0
      break;
677
0
    }
678
0
    case Type::DCT128X256: {
679
0
      ComputeScaledIDCT<128, 256>()(coefficients, DCTTo(pixels, pixels_stride),
680
0
                                    scratch_space);
681
0
      break;
682
0
    }
683
0
    case Type::DCT256X256: {
684
0
      ComputeScaledIDCT<256, 256>()(coefficients, DCTTo(pixels, pixels_stride),
685
0
                                    scratch_space);
686
0
      break;
687
0
    }
688
20.5M
  }
689
20.5M
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformToPixels(jxl::AcStrategyType, float*, float*, unsigned long, float*)
690
691
HWY_MAYBE_UNUSED void LowestFrequenciesFromDC(const AcStrategyType strategy,
692
                                              const float* dc, size_t dc_stride,
693
                                              float* llf,
694
20.7M
                                              float* JXL_RESTRICT scratch) {
695
20.7M
  using Type = AcStrategyType;
696
20.7M
  HWY_ALIGN float warm_block[4 * 4];
697
20.7M
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
20.7M
  switch (strategy) {
699
775k
    case Type::DCT16X8: {
700
775k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
775k
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
775k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
775k
      break;
704
0
    }
705
838k
    case Type::DCT8X16: {
706
838k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
838k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
838k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
838k
      break;
710
0
    }
711
547k
    case Type::DCT16X16: {
712
547k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
547k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
547k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
547k
      break;
716
0
    }
717
708
    case Type::DCT32X8: {
718
708
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
708
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
708
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
708
      break;
722
0
    }
723
108
    case Type::DCT8X32: {
724
108
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
108
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
108
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
108
      break;
728
0
    }
729
173k
    case Type::DCT32X16: {
730
173k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
173k
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
173k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
173k
      break;
734
0
    }
735
173k
    case Type::DCT16X32: {
736
173k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
173k
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
173k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
173k
      break;
740
0
    }
741
298k
    case Type::DCT32X32: {
742
298k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
298k
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
298k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
298k
      break;
746
0
    }
747
48.6k
    case Type::DCT64X32: {
748
48.6k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
48.6k
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
48.6k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
48.6k
      break;
752
0
    }
753
19.2k
    case Type::DCT32X64: {
754
19.2k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
19.2k
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
19.2k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
19.2k
      break;
758
0
    }
759
163k
    case Type::DCT64X64: {
760
163k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
163k
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
163k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
163k
      break;
764
0
    }
765
3
    case Type::DCT128X64: {
766
3
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
3
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
3
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
3
      break;
770
0
    }
771
9
    case Type::DCT64X128: {
772
9
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
9
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
9
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
9
      break;
776
0
    }
777
6
    case Type::DCT128X128: {
778
6
      ReinterpretingDCT<
779
6
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
6
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
6
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
6
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
0
    case Type::DCT256X256: {
799
0
      ReinterpretingDCT<
800
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
0
      break;
804
0
    }
805
5.64M
    case Type::DCT:
806
12.4M
    case Type::DCT2X2:
807
12.4M
    case Type::DCT4X4:
808
12.6M
    case Type::DCT4X8:
809
13.1M
    case Type::DCT8X4:
810
13.5M
    case Type::AFV0:
811
13.7M
    case Type::AFV1:
812
13.9M
    case Type::AFV2:
813
14.2M
    case Type::AFV3:
814
17.7M
    case Type::IDENTITY:
815
17.7M
      llf[0] = dc[0];
816
17.7M
      break;
817
20.7M
  };
818
20.7M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: dec_group.cc:jxl::N_SSE4::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
dec_group.cc:jxl::N_AVX2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
694
20.7M
                                              float* JXL_RESTRICT scratch) {
695
20.7M
  using Type = AcStrategyType;
696
20.7M
  HWY_ALIGN float warm_block[4 * 4];
697
20.7M
  HWY_ALIGN float warm_scratch_space[4 * 4 * 4];
698
20.7M
  switch (strategy) {
699
775k
    case Type::DCT16X8: {
700
775k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
701
775k
                        /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
702
775k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
703
775k
      break;
704
0
    }
705
838k
    case Type::DCT8X16: {
706
838k
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
707
838k
                        /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
708
838k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
709
838k
      break;
710
0
    }
711
547k
    case Type::DCT16X16: {
712
547k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
713
547k
                        /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
714
547k
          dc, dc_stride, llf, 2 * kBlockDim, warm_block, warm_scratch_space);
715
547k
      break;
716
0
    }
717
708
    case Type::DCT32X8: {
718
708
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
719
708
                        /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
720
708
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
721
708
      break;
722
0
    }
723
108
    case Type::DCT8X32: {
724
108
      ReinterpretingDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
725
108
                        /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
726
108
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
727
108
      break;
728
0
    }
729
173k
    case Type::DCT32X16: {
730
173k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
731
173k
                        /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
732
173k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
733
173k
      break;
734
0
    }
735
173k
    case Type::DCT16X32: {
736
173k
      ReinterpretingDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
737
173k
                        /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
738
173k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
739
173k
      break;
740
0
    }
741
298k
    case Type::DCT32X32: {
742
298k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
743
298k
                        /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
744
298k
          dc, dc_stride, llf, 4 * kBlockDim, warm_block, warm_scratch_space);
745
298k
      break;
746
0
    }
747
48.6k
    case Type::DCT64X32: {
748
48.6k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
749
48.6k
                        /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
750
48.6k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 4);
751
48.6k
      break;
752
0
    }
753
19.2k
    case Type::DCT32X64: {
754
19.2k
      ReinterpretingDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
755
19.2k
                        /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
756
19.2k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 4 * 8);
757
19.2k
      break;
758
0
    }
759
163k
    case Type::DCT64X64: {
760
163k
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
761
163k
                        /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
762
163k
          dc, dc_stride, llf, 8 * kBlockDim, scratch, scratch + 8 * 8);
763
163k
      break;
764
0
    }
765
3
    case Type::DCT128X64: {
766
3
      ReinterpretingDCT</*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
767
3
                        /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
768
3
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 8);
769
3
      break;
770
0
    }
771
9
    case Type::DCT64X128: {
772
9
      ReinterpretingDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
773
9
                        /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
774
9
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 8 * 16);
775
9
      break;
776
0
    }
777
6
    case Type::DCT128X128: {
778
6
      ReinterpretingDCT<
779
6
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
780
6
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
781
6
          dc, dc_stride, llf, 16 * kBlockDim, scratch, scratch + 16 * 16);
782
6
      break;
783
0
    }
784
0
    case Type::DCT256X128: {
785
0
      ReinterpretingDCT<
786
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
787
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
788
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 16);
789
0
      break;
790
0
    }
791
0
    case Type::DCT128X256: {
792
0
      ReinterpretingDCT<
793
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
794
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
795
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 16 * 32);
796
0
      break;
797
0
    }
798
0
    case Type::DCT256X256: {
799
0
      ReinterpretingDCT<
800
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
801
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
802
0
          dc, dc_stride, llf, 32 * kBlockDim, scratch, scratch + 32 * 32);
803
0
      break;
804
0
    }
805
5.64M
    case Type::DCT:
806
12.4M
    case Type::DCT2X2:
807
12.4M
    case Type::DCT4X4:
808
12.6M
    case Type::DCT4X8:
809
13.1M
    case Type::DCT8X4:
810
13.5M
    case Type::AFV0:
811
13.7M
    case Type::AFV1:
812
13.9M
    case Type::AFV2:
813
14.2M
    case Type::AFV3:
814
17.7M
    case Type::IDENTITY:
815
17.7M
      llf[0] = dc[0];
816
17.7M
      break;
817
20.7M
  };
818
20.7M
}
Unexecuted instantiation: dec_group.cc:jxl::N_SSE2::(anonymous namespace)::LowestFrequenciesFromDC(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
819
820
}  // namespace
821
// NOLINTNEXTLINE(google-readability-namespace-comments)
822
}  // namespace HWY_NAMESPACE
823
}  // namespace jxl
824
HWY_AFTER_NAMESPACE();
825
826
#endif  // LIB_JXL_DEC_TRANSFORMS_INL_H_