Coverage Report

Created: 2026-03-31 06:56

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/enc_transforms-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/base/compiler_specific.h"
7
#include "lib/jxl/frame_dimensions.h"
8
9
#if defined(LIB_JXL_ENC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
10
#ifdef LIB_JXL_ENC_TRANSFORMS_INL_H_
11
#undef LIB_JXL_ENC_TRANSFORMS_INL_H_
12
#else
13
#define LIB_JXL_ENC_TRANSFORMS_INL_H_
14
#endif
15
16
#include <cstddef>
17
#include <cstdint>
18
#include <hwy/highway.h>
19
20
#include "lib/jxl/ac_strategy.h"
21
#include "lib/jxl/dct-inl.h"
22
#include "lib/jxl/dct_scales.h"
23
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
27
enum class AcStrategyType : uint32_t;
28
29
namespace HWY_NAMESPACE {
30
namespace {
31
32
constexpr size_t kMaxBlocks = 32;
33
34
// Inverse of ReinterpretingDCT.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
HWY_INLINE void ReinterpretingIDCT(const float* input,
38
                                   const size_t input_stride, float* output,
39
1.79M
                                   const size_t output_stride, float* scratch) {
40
1.79M
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
1.79M
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
1.79M
  float* block = scratch;
43
1.79M
  if (ROWS < COLS) {
44
1.35M
    for (size_t y = 0; y < LF_ROWS; y++) {
45
2.96M
      for (size_t x = 0; x < LF_COLS; x++) {
46
2.21M
        block[y * COLS + x] = input[y * input_stride + x] *
47
2.21M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
2.21M
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
2.21M
      }
50
750k
    }
51
1.19M
  } else {
52
4.11M
    for (size_t y = 0; y < LF_COLS; y++) {
53
15.8M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
12.9M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
12.9M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
12.9M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
12.9M
      }
58
2.91M
    }
59
1.19M
  }
60
61
1.79M
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
1.79M
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
1.79M
                                  scratch_space);
64
1.79M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
227k
                                   const size_t output_stride, float* scratch) {
40
227k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
227k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
227k
  float* block = scratch;
43
227k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
227k
  } else {
52
455k
    for (size_t y = 0; y < LF_COLS; y++) {
53
682k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
455k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
455k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
455k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
455k
      }
58
227k
    }
59
227k
  }
60
61
227k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
227k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
227k
                                  scratch_space);
64
227k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
244k
                                   const size_t output_stride, float* scratch) {
40
244k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
244k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
244k
  float* block = scratch;
43
244k
  if (ROWS < COLS) {
44
489k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
734k
      for (size_t x = 0; x < LF_COLS; x++) {
46
489k
        block[y * COLS + x] = input[y * input_stride + x] *
47
489k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
489k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
489k
      }
50
244k
    }
51
244k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
244k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
244k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
244k
                                  scratch_space);
64
244k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
166k
                                   const size_t output_stride, float* scratch) {
40
166k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
166k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
166k
  float* block = scratch;
43
166k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
166k
  } else {
52
498k
    for (size_t y = 0; y < LF_COLS; y++) {
53
997k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
665k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
665k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
665k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
665k
      }
58
332k
    }
59
166k
  }
60
61
166k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
166k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
166k
                                  scratch_space);
64
166k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
51.3k
                                   const size_t output_stride, float* scratch) {
40
51.3k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
51.3k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
51.3k
  float* block = scratch;
43
51.3k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
51.3k
  } else {
52
153k
    for (size_t y = 0; y < LF_COLS; y++) {
53
513k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
410k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
410k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
410k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
410k
      }
58
102k
    }
59
51.3k
  }
60
61
51.3k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
51.3k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
51.3k
                                  scratch_space);
64
51.3k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
52.6k
                                   const size_t output_stride, float* scratch) {
40
52.6k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
52.6k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
52.6k
  float* block = scratch;
43
52.6k
  if (ROWS < COLS) {
44
158k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
526k
      for (size_t x = 0; x < LF_COLS; x++) {
46
421k
        block[y * COLS + x] = input[y * input_stride + x] *
47
421k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
421k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
421k
      }
50
105k
    }
51
52.6k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
52.6k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
52.6k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
52.6k
                                  scratch_space);
64
52.6k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
90.3k
                                   const size_t output_stride, float* scratch) {
40
90.3k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
90.3k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
90.3k
  float* block = scratch;
43
90.3k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
90.3k
  } else {
52
451k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.80M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
1.44M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
1.44M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
1.44M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
1.44M
      }
58
361k
    }
59
90.3k
  }
60
61
90.3k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
90.3k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
90.3k
                                  scratch_space);
64
90.3k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
11.8k
                                   const size_t output_stride, float* scratch) {
40
11.8k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
11.8k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
11.8k
  float* block = scratch;
43
11.8k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
11.8k
  } else {
52
59.1k
    for (size_t y = 0; y < LF_COLS; y++) {
53
425k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
378k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
378k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
378k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
378k
      }
58
47.2k
    }
59
11.8k
  }
60
61
11.8k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
11.8k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
11.8k
                                  scratch_space);
64
11.8k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
6.19k
                                   const size_t output_stride, float* scratch) {
40
6.19k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
6.19k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
6.19k
  float* block = scratch;
43
6.19k
  if (ROWS < COLS) {
44
30.9k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
223k
      for (size_t x = 0; x < LF_COLS; x++) {
46
198k
        block[y * COLS + x] = input[y * input_stride + x] *
47
198k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
198k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
198k
      }
50
24.7k
    }
51
6.19k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
6.19k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
6.19k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
6.19k
                                  scratch_space);
64
6.19k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
48.5k
                                   const size_t output_stride, float* scratch) {
40
48.5k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
48.5k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
48.5k
  float* block = scratch;
43
48.5k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
48.5k
  } else {
52
436k
    for (size_t y = 0; y < LF_COLS; y++) {
53
3.49M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
3.10M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
3.10M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
3.10M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
3.10M
      }
58
388k
    }
59
48.5k
  }
60
61
48.5k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
48.5k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
48.5k
                                  scratch_space);
64
48.5k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
227k
                                   const size_t output_stride, float* scratch) {
40
227k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
227k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
227k
  float* block = scratch;
43
227k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
227k
  } else {
52
455k
    for (size_t y = 0; y < LF_COLS; y++) {
53
682k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
455k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
455k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
455k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
455k
      }
58
227k
    }
59
227k
  }
60
61
227k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
227k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
227k
                                  scratch_space);
64
227k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
244k
                                   const size_t output_stride, float* scratch) {
40
244k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
244k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
244k
  float* block = scratch;
43
244k
  if (ROWS < COLS) {
44
489k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
734k
      for (size_t x = 0; x < LF_COLS; x++) {
46
489k
        block[y * COLS + x] = input[y * input_stride + x] *
47
489k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
489k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
489k
      }
50
244k
    }
51
244k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
244k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
244k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
244k
                                  scratch_space);
64
244k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
166k
                                   const size_t output_stride, float* scratch) {
40
166k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
166k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
166k
  float* block = scratch;
43
166k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
166k
  } else {
52
498k
    for (size_t y = 0; y < LF_COLS; y++) {
53
997k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
665k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
665k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
665k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
665k
      }
58
332k
    }
59
166k
  }
60
61
166k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
166k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
166k
                                  scratch_space);
64
166k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
51.3k
                                   const size_t output_stride, float* scratch) {
40
51.3k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
51.3k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
51.3k
  float* block = scratch;
43
51.3k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
51.3k
  } else {
52
153k
    for (size_t y = 0; y < LF_COLS; y++) {
53
513k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
410k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
410k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
410k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
410k
      }
58
102k
    }
59
51.3k
  }
60
61
51.3k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
51.3k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
51.3k
                                  scratch_space);
64
51.3k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
52.6k
                                   const size_t output_stride, float* scratch) {
40
52.6k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
52.6k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
52.6k
  float* block = scratch;
43
52.6k
  if (ROWS < COLS) {
44
158k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
526k
      for (size_t x = 0; x < LF_COLS; x++) {
46
421k
        block[y * COLS + x] = input[y * input_stride + x] *
47
421k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
421k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
421k
      }
50
105k
    }
51
52.6k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
52.6k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
52.6k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
52.6k
                                  scratch_space);
64
52.6k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
90.3k
                                   const size_t output_stride, float* scratch) {
40
90.3k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
90.3k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
90.3k
  float* block = scratch;
43
90.3k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
90.3k
  } else {
52
451k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.80M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
1.44M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
1.44M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
1.44M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
1.44M
      }
58
361k
    }
59
90.3k
  }
60
61
90.3k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
90.3k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
90.3k
                                  scratch_space);
64
90.3k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
11.8k
                                   const size_t output_stride, float* scratch) {
40
11.8k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
11.8k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
11.8k
  float* block = scratch;
43
11.8k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
11.8k
  } else {
52
59.1k
    for (size_t y = 0; y < LF_COLS; y++) {
53
425k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
378k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
378k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
378k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
378k
      }
58
47.2k
    }
59
11.8k
  }
60
61
11.8k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
11.8k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
11.8k
                                  scratch_space);
64
11.8k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
6.19k
                                   const size_t output_stride, float* scratch) {
40
6.19k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
6.19k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
6.19k
  float* block = scratch;
43
6.19k
  if (ROWS < COLS) {
44
30.9k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
223k
      for (size_t x = 0; x < LF_COLS; x++) {
46
198k
        block[y * COLS + x] = input[y * input_stride + x] *
47
198k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
198k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
198k
      }
50
24.7k
    }
51
6.19k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
6.19k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
6.19k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
6.19k
                                  scratch_space);
64
6.19k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
48.5k
                                   const size_t output_stride, float* scratch) {
40
48.5k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
48.5k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
48.5k
  float* block = scratch;
43
48.5k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
48.5k
  } else {
52
436k
    for (size_t y = 0; y < LF_COLS; y++) {
53
3.49M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
3.10M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
3.10M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
3.10M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
3.10M
      }
58
388k
    }
59
48.5k
  }
60
61
48.5k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
48.5k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
48.5k
                                  scratch_space);
64
48.5k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
65
66
template <size_t S>
67
49.4M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
49.4M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
49.4M
  static_assert(S % 2 == 0, "S should be even");
70
49.4M
  float temp[kDCTBlockSize];
71
49.4M
  constexpr size_t num_2x2 = S / 2;
72
164M
  for (size_t y = 0; y < num_2x2; y++) {
73
461M
    for (size_t x = 0; x < num_2x2; x++) {
74
345M
      float c00 = block[y * 2 * stride + x * 2];
75
345M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
345M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
345M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
345M
      float r00 = c00 + c01 + c10 + c11;
79
345M
      float r01 = c00 + c01 - c10 - c11;
80
345M
      float r10 = c00 - c01 + c10 - c11;
81
345M
      float r11 = c00 - c01 - c10 + c11;
82
345M
      r00 *= 0.25f;
83
345M
      r01 *= 0.25f;
84
345M
      r10 *= 0.25f;
85
345M
      r11 *= 0.25f;
86
345M
      temp[y * kBlockDim + x] = r00;
87
345M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
345M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
345M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
345M
    }
91
115M
  }
92
280M
  for (size_t y = 0; y < S; y++) {
93
1.61G
    for (size_t x = 0; x < S; x++) {
94
1.38G
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
1.38G
    }
96
230M
  }
97
49.4M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.04M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
2.04M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.04M
  static_assert(S % 2 == 0, "S should be even");
70
2.04M
  float temp[kDCTBlockSize];
71
2.04M
  constexpr size_t num_2x2 = S / 2;
72
10.2M
  for (size_t y = 0; y < num_2x2; y++) {
73
40.8M
    for (size_t x = 0; x < num_2x2; x++) {
74
32.7M
      float c00 = block[y * 2 * stride + x * 2];
75
32.7M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
32.7M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
32.7M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
32.7M
      float r00 = c00 + c01 + c10 + c11;
79
32.7M
      float r01 = c00 + c01 - c10 - c11;
80
32.7M
      float r10 = c00 - c01 + c10 - c11;
81
32.7M
      float r11 = c00 - c01 - c10 + c11;
82
32.7M
      r00 *= 0.25f;
83
32.7M
      r01 *= 0.25f;
84
32.7M
      r10 *= 0.25f;
85
32.7M
      r11 *= 0.25f;
86
32.7M
      temp[y * kBlockDim + x] = r00;
87
32.7M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
32.7M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
32.7M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
32.7M
    }
91
8.17M
  }
92
18.3M
  for (size_t y = 0; y < S; y++) {
93
147M
    for (size_t x = 0; x < S; x++) {
94
130M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
130M
    }
96
16.3M
  }
97
2.04M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.04M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
2.04M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.04M
  static_assert(S % 2 == 0, "S should be even");
70
2.04M
  float temp[kDCTBlockSize];
71
2.04M
  constexpr size_t num_2x2 = S / 2;
72
6.13M
  for (size_t y = 0; y < num_2x2; y++) {
73
12.2M
    for (size_t x = 0; x < num_2x2; x++) {
74
8.17M
      float c00 = block[y * 2 * stride + x * 2];
75
8.17M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
8.17M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
8.17M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
8.17M
      float r00 = c00 + c01 + c10 + c11;
79
8.17M
      float r01 = c00 + c01 - c10 - c11;
80
8.17M
      float r10 = c00 - c01 + c10 - c11;
81
8.17M
      float r11 = c00 - c01 - c10 + c11;
82
8.17M
      r00 *= 0.25f;
83
8.17M
      r01 *= 0.25f;
84
8.17M
      r10 *= 0.25f;
85
8.17M
      r11 *= 0.25f;
86
8.17M
      temp[y * kBlockDim + x] = r00;
87
8.17M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
8.17M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
8.17M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
8.17M
    }
91
4.08M
  }
92
10.2M
  for (size_t y = 0; y < S; y++) {
93
40.8M
    for (size_t x = 0; x < S; x++) {
94
32.7M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
32.7M
    }
96
8.17M
  }
97
2.04M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.04M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
2.04M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.04M
  static_assert(S % 2 == 0, "S should be even");
70
2.04M
  float temp[kDCTBlockSize];
71
2.04M
  constexpr size_t num_2x2 = S / 2;
72
4.08M
  for (size_t y = 0; y < num_2x2; y++) {
73
4.08M
    for (size_t x = 0; x < num_2x2; x++) {
74
2.04M
      float c00 = block[y * 2 * stride + x * 2];
75
2.04M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
2.04M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
2.04M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
2.04M
      float r00 = c00 + c01 + c10 + c11;
79
2.04M
      float r01 = c00 + c01 - c10 - c11;
80
2.04M
      float r10 = c00 - c01 + c10 - c11;
81
2.04M
      float r11 = c00 - c01 - c10 + c11;
82
2.04M
      r00 *= 0.25f;
83
2.04M
      r01 *= 0.25f;
84
2.04M
      r10 *= 0.25f;
85
2.04M
      r11 *= 0.25f;
86
2.04M
      temp[y * kBlockDim + x] = r00;
87
2.04M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
2.04M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
2.04M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
2.04M
    }
91
2.04M
  }
92
6.13M
  for (size_t y = 0; y < S; y++) {
93
12.2M
    for (size_t x = 0; x < S; x++) {
94
8.17M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
8.17M
    }
96
4.08M
  }
97
2.04M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.04M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
2.04M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.04M
  static_assert(S % 2 == 0, "S should be even");
70
2.04M
  float temp[kDCTBlockSize];
71
2.04M
  constexpr size_t num_2x2 = S / 2;
72
10.2M
  for (size_t y = 0; y < num_2x2; y++) {
73
40.8M
    for (size_t x = 0; x < num_2x2; x++) {
74
32.7M
      float c00 = block[y * 2 * stride + x * 2];
75
32.7M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
32.7M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
32.7M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
32.7M
      float r00 = c00 + c01 + c10 + c11;
79
32.7M
      float r01 = c00 + c01 - c10 - c11;
80
32.7M
      float r10 = c00 - c01 + c10 - c11;
81
32.7M
      float r11 = c00 - c01 - c10 + c11;
82
32.7M
      r00 *= 0.25f;
83
32.7M
      r01 *= 0.25f;
84
32.7M
      r10 *= 0.25f;
85
32.7M
      r11 *= 0.25f;
86
32.7M
      temp[y * kBlockDim + x] = r00;
87
32.7M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
32.7M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
32.7M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
32.7M
    }
91
8.17M
  }
92
18.3M
  for (size_t y = 0; y < S; y++) {
93
147M
    for (size_t x = 0; x < S; x++) {
94
130M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
130M
    }
96
16.3M
  }
97
2.04M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.04M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
2.04M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.04M
  static_assert(S % 2 == 0, "S should be even");
70
2.04M
  float temp[kDCTBlockSize];
71
2.04M
  constexpr size_t num_2x2 = S / 2;
72
6.13M
  for (size_t y = 0; y < num_2x2; y++) {
73
12.2M
    for (size_t x = 0; x < num_2x2; x++) {
74
8.17M
      float c00 = block[y * 2 * stride + x * 2];
75
8.17M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
8.17M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
8.17M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
8.17M
      float r00 = c00 + c01 + c10 + c11;
79
8.17M
      float r01 = c00 + c01 - c10 - c11;
80
8.17M
      float r10 = c00 - c01 + c10 - c11;
81
8.17M
      float r11 = c00 - c01 - c10 + c11;
82
8.17M
      r00 *= 0.25f;
83
8.17M
      r01 *= 0.25f;
84
8.17M
      r10 *= 0.25f;
85
8.17M
      r11 *= 0.25f;
86
8.17M
      temp[y * kBlockDim + x] = r00;
87
8.17M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
8.17M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
8.17M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
8.17M
    }
91
4.08M
  }
92
10.2M
  for (size_t y = 0; y < S; y++) {
93
40.8M
    for (size_t x = 0; x < S; x++) {
94
32.7M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
32.7M
    }
96
8.17M
  }
97
2.04M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.04M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
2.04M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.04M
  static_assert(S % 2 == 0, "S should be even");
70
2.04M
  float temp[kDCTBlockSize];
71
2.04M
  constexpr size_t num_2x2 = S / 2;
72
4.08M
  for (size_t y = 0; y < num_2x2; y++) {
73
4.08M
    for (size_t x = 0; x < num_2x2; x++) {
74
2.04M
      float c00 = block[y * 2 * stride + x * 2];
75
2.04M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
2.04M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
2.04M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
2.04M
      float r00 = c00 + c01 + c10 + c11;
79
2.04M
      float r01 = c00 + c01 - c10 - c11;
80
2.04M
      float r10 = c00 - c01 + c10 - c11;
81
2.04M
      float r11 = c00 - c01 - c10 + c11;
82
2.04M
      r00 *= 0.25f;
83
2.04M
      r01 *= 0.25f;
84
2.04M
      r10 *= 0.25f;
85
2.04M
      r11 *= 0.25f;
86
2.04M
      temp[y * kBlockDim + x] = r00;
87
2.04M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
2.04M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
2.04M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
2.04M
    }
91
2.04M
  }
92
6.13M
  for (size_t y = 0; y < S; y++) {
93
12.2M
    for (size_t x = 0; x < S; x++) {
94
8.17M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
8.17M
    }
96
4.08M
  }
97
2.04M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
12.3M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
12.3M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
12.3M
  static_assert(S % 2 == 0, "S should be even");
70
12.3M
  float temp[kDCTBlockSize];
71
12.3M
  constexpr size_t num_2x2 = S / 2;
72
61.9M
  for (size_t y = 0; y < num_2x2; y++) {
73
247M
    for (size_t x = 0; x < num_2x2; x++) {
74
198M
      float c00 = block[y * 2 * stride + x * 2];
75
198M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
198M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
198M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
198M
      float r00 = c00 + c01 + c10 + c11;
79
198M
      float r01 = c00 + c01 - c10 - c11;
80
198M
      float r10 = c00 - c01 + c10 - c11;
81
198M
      float r11 = c00 - c01 - c10 + c11;
82
198M
      r00 *= 0.25f;
83
198M
      r01 *= 0.25f;
84
198M
      r10 *= 0.25f;
85
198M
      r11 *= 0.25f;
86
198M
      temp[y * kBlockDim + x] = r00;
87
198M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
198M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
198M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
198M
    }
91
49.5M
  }
92
111M
  for (size_t y = 0; y < S; y++) {
93
891M
    for (size_t x = 0; x < S; x++) {
94
792M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
792M
    }
96
99.1M
  }
97
12.3M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
12.3M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
12.3M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
12.3M
  static_assert(S % 2 == 0, "S should be even");
70
12.3M
  float temp[kDCTBlockSize];
71
12.3M
  constexpr size_t num_2x2 = S / 2;
72
37.1M
  for (size_t y = 0; y < num_2x2; y++) {
73
74.3M
    for (size_t x = 0; x < num_2x2; x++) {
74
49.5M
      float c00 = block[y * 2 * stride + x * 2];
75
49.5M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
49.5M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
49.5M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
49.5M
      float r00 = c00 + c01 + c10 + c11;
79
49.5M
      float r01 = c00 + c01 - c10 - c11;
80
49.5M
      float r10 = c00 - c01 + c10 - c11;
81
49.5M
      float r11 = c00 - c01 - c10 + c11;
82
49.5M
      r00 *= 0.25f;
83
49.5M
      r01 *= 0.25f;
84
49.5M
      r10 *= 0.25f;
85
49.5M
      r11 *= 0.25f;
86
49.5M
      temp[y * kBlockDim + x] = r00;
87
49.5M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
49.5M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
49.5M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
49.5M
    }
91
24.7M
  }
92
61.9M
  for (size_t y = 0; y < S; y++) {
93
247M
    for (size_t x = 0; x < S; x++) {
94
198M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
198M
    }
96
49.5M
  }
97
12.3M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
12.3M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
12.3M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
12.3M
  static_assert(S % 2 == 0, "S should be even");
70
12.3M
  float temp[kDCTBlockSize];
71
12.3M
  constexpr size_t num_2x2 = S / 2;
72
24.7M
  for (size_t y = 0; y < num_2x2; y++) {
73
24.7M
    for (size_t x = 0; x < num_2x2; x++) {
74
12.3M
      float c00 = block[y * 2 * stride + x * 2];
75
12.3M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
12.3M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
12.3M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
12.3M
      float r00 = c00 + c01 + c10 + c11;
79
12.3M
      float r01 = c00 + c01 - c10 - c11;
80
12.3M
      float r10 = c00 - c01 + c10 - c11;
81
12.3M
      float r11 = c00 - c01 - c10 + c11;
82
12.3M
      r00 *= 0.25f;
83
12.3M
      r01 *= 0.25f;
84
12.3M
      r10 *= 0.25f;
85
12.3M
      r11 *= 0.25f;
86
12.3M
      temp[y * kBlockDim + x] = r00;
87
12.3M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
12.3M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
12.3M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
12.3M
    }
91
12.3M
  }
92
37.1M
  for (size_t y = 0; y < S; y++) {
93
74.3M
    for (size_t x = 0; x < S; x++) {
94
49.5M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
49.5M
    }
96
24.7M
  }
97
12.3M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
98
99
50.1M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
50.1M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
50.1M
      {
102
50.1M
          0.2500000000000000,
103
50.1M
          0.8769029297991420f,
104
50.1M
          0.0000000000000000,
105
50.1M
          0.0000000000000000,
106
50.1M
          0.0000000000000000,
107
50.1M
          -0.4105377591765233f,
108
50.1M
          0.0000000000000000,
109
50.1M
          0.0000000000000000,
110
50.1M
          0.0000000000000000,
111
50.1M
          0.0000000000000000,
112
50.1M
          0.0000000000000000,
113
50.1M
          0.0000000000000000,
114
50.1M
          0.0000000000000000,
115
50.1M
          0.0000000000000000,
116
50.1M
          0.0000000000000000,
117
50.1M
          0.0000000000000000,
118
50.1M
      },
119
50.1M
      {
120
50.1M
          0.2500000000000000,
121
50.1M
          0.2206518106944235f,
122
50.1M
          0.0000000000000000,
123
50.1M
          0.0000000000000000,
124
50.1M
          -0.7071067811865474f,
125
50.1M
          0.6235485373547691f,
126
50.1M
          0.0000000000000000,
127
50.1M
          0.0000000000000000,
128
50.1M
          0.0000000000000000,
129
50.1M
          0.0000000000000000,
130
50.1M
          0.0000000000000000,
131
50.1M
          0.0000000000000000,
132
50.1M
          0.0000000000000000,
133
50.1M
          0.0000000000000000,
134
50.1M
          0.0000000000000000,
135
50.1M
          0.0000000000000000,
136
50.1M
      },
137
50.1M
      {
138
50.1M
          0.2500000000000000,
139
50.1M
          -0.1014005039375376f,
140
50.1M
          0.4067007583026075f,
141
50.1M
          -0.2125574805828875f,
142
50.1M
          0.0000000000000000,
143
50.1M
          -0.0643507165794627f,
144
50.1M
          -0.4517556589999482f,
145
50.1M
          -0.3046847507248690f,
146
50.1M
          0.3017929516615495f,
147
50.1M
          0.4082482904638627f,
148
50.1M
          0.1747866975480809f,
149
50.1M
          -0.2110560104933578f,
150
50.1M
          -0.1426608480880726f,
151
50.1M
          -0.1381354035075859f,
152
50.1M
          -0.1743760259965107f,
153
50.1M
          0.1135498731499434f,
154
50.1M
      },
155
50.1M
      {
156
50.1M
          0.2500000000000000,
157
50.1M
          -0.1014005039375375f,
158
50.1M
          0.4444481661973445f,
159
50.1M
          0.3085497062849767f,
160
50.1M
          0.0000000000000000f,
161
50.1M
          -0.0643507165794627f,
162
50.1M
          0.1585450355184006f,
163
50.1M
          0.5112616136591823f,
164
50.1M
          0.2579236279634118f,
165
50.1M
          0.0000000000000000,
166
50.1M
          0.0812611176717539f,
167
50.1M
          0.1856718091610980f,
168
50.1M
          -0.3416446842253372f,
169
50.1M
          0.3302282550303788f,
170
50.1M
          0.0702790691196284f,
171
50.1M
          -0.0741750459581035f,
172
50.1M
      },
173
50.1M
      {
174
50.1M
          0.2500000000000000,
175
50.1M
          0.2206518106944236f,
176
50.1M
          0.0000000000000000,
177
50.1M
          0.0000000000000000,
178
50.1M
          0.7071067811865476f,
179
50.1M
          0.6235485373547694f,
180
50.1M
          0.0000000000000000,
181
50.1M
          0.0000000000000000,
182
50.1M
          0.0000000000000000,
183
50.1M
          0.0000000000000000,
184
50.1M
          0.0000000000000000,
185
50.1M
          0.0000000000000000,
186
50.1M
          0.0000000000000000,
187
50.1M
          0.0000000000000000,
188
50.1M
          0.0000000000000000,
189
50.1M
          0.0000000000000000,
190
50.1M
      },
191
50.1M
      {
192
50.1M
          0.2500000000000000,
193
50.1M
          -0.1014005039375378f,
194
50.1M
          0.0000000000000000,
195
50.1M
          0.4706702258572536f,
196
50.1M
          0.0000000000000000,
197
50.1M
          -0.0643507165794628f,
198
50.1M
          -0.0403851516082220f,
199
50.1M
          0.0000000000000000,
200
50.1M
          0.1627234014286620f,
201
50.1M
          0.0000000000000000,
202
50.1M
          0.0000000000000000,
203
50.1M
          0.0000000000000000,
204
50.1M
          0.7367497537172237f,
205
50.1M
          0.0875511500058708f,
206
50.1M
          -0.2921026642334881f,
207
50.1M
          0.1940289303259434f,
208
50.1M
      },
209
50.1M
      {
210
50.1M
          0.2500000000000000,
211
50.1M
          -0.1014005039375377f,
212
50.1M
          0.1957439937204294f,
213
50.1M
          -0.1621205195722993f,
214
50.1M
          0.0000000000000000,
215
50.1M
          -0.0643507165794628f,
216
50.1M
          0.0074182263792424f,
217
50.1M
          -0.2904801297289980f,
218
50.1M
          0.0952002265347504f,
219
50.1M
          0.0000000000000000,
220
50.1M
          -0.3675398009862027f,
221
50.1M
          0.4921585901373873f,
222
50.1M
          0.2462710772207515f,
223
50.1M
          -0.0794670660590957f,
224
50.1M
          0.3623817333531167f,
225
50.1M
          -0.4351904965232280f,
226
50.1M
      },
227
50.1M
      {
228
50.1M
          0.2500000000000000,
229
50.1M
          -0.1014005039375376f,
230
50.1M
          0.2929100136981264f,
231
50.1M
          0.0000000000000000,
232
50.1M
          0.0000000000000000,
233
50.1M
          -0.0643507165794627f,
234
50.1M
          0.3935103426921017f,
235
50.1M
          -0.0657870154914280f,
236
50.1M
          0.0000000000000000,
237
50.1M
          -0.4082482904638628f,
238
50.1M
          -0.3078822139579090f,
239
50.1M
          -0.3852501370925192f,
240
50.1M
          -0.0857401903551931f,
241
50.1M
          -0.4613374887461511f,
242
50.1M
          0.0000000000000000,
243
50.1M
          0.2191868483885747f,
244
50.1M
      },
245
50.1M
      {
246
50.1M
          0.2500000000000000,
247
50.1M
          -0.1014005039375376f,
248
50.1M
          -0.4067007583026072f,
249
50.1M
          -0.2125574805828705f,
250
50.1M
          0.0000000000000000,
251
50.1M
          -0.0643507165794627f,
252
50.1M
          -0.4517556589999464f,
253
50.1M
          0.3046847507248840f,
254
50.1M
          0.3017929516615503f,
255
50.1M
          -0.4082482904638635f,
256
50.1M
          -0.1747866975480813f,
257
50.1M
          0.2110560104933581f,
258
50.1M
          -0.1426608480880734f,
259
50.1M
          -0.1381354035075829f,
260
50.1M
          -0.1743760259965108f,
261
50.1M
          0.1135498731499426f,
262
50.1M
      },
263
50.1M
      {
264
50.1M
          0.2500000000000000,
265
50.1M
          -0.1014005039375377f,
266
50.1M
          -0.1957439937204287f,
267
50.1M
          -0.1621205195722833f,
268
50.1M
          0.0000000000000000,
269
50.1M
          -0.0643507165794628f,
270
50.1M
          0.0074182263792444f,
271
50.1M
          0.2904801297290076f,
272
50.1M
          0.0952002265347505f,
273
50.1M
          0.0000000000000000,
274
50.1M
          0.3675398009862011f,
275
50.1M
          -0.4921585901373891f,
276
50.1M
          0.2462710772207514f,
277
50.1M
          -0.0794670660591026f,
278
50.1M
          0.3623817333531165f,
279
50.1M
          -0.4351904965232251f,
280
50.1M
      },
281
50.1M
      {
282
50.1M
          0.2500000000000000,
283
50.1M
          -0.1014005039375375f,
284
50.1M
          0.0000000000000000,
285
50.1M
          -0.4706702258572528f,
286
50.1M
          0.0000000000000000,
287
50.1M
          -0.0643507165794627f,
288
50.1M
          0.1107416575309343f,
289
50.1M
          0.0000000000000000,
290
50.1M
          -0.1627234014286617f,
291
50.1M
          0.0000000000000000,
292
50.1M
          0.0000000000000000,
293
50.1M
          0.0000000000000000,
294
50.1M
          0.1488339922711357f,
295
50.1M
          0.4972464710953509f,
296
50.1M
          0.2921026642334879f,
297
50.1M
          0.5550443808910661f,
298
50.1M
      },
299
50.1M
      {
300
50.1M
          0.2500000000000000,
301
50.1M
          -0.1014005039375377f,
302
50.1M
          0.1137907446044809f,
303
50.1M
          -0.1464291867126764f,
304
50.1M
          0.0000000000000000,
305
50.1M
          -0.0643507165794628f,
306
50.1M
          0.0829816309488205f,
307
50.1M
          -0.2388977352334460f,
308
50.1M
          -0.3531238544981630f,
309
50.1M
          -0.4082482904638630f,
310
50.1M
          0.4826689115059883f,
311
50.1M
          0.1741941265991622f,
312
50.1M
          -0.0476868035022925f,
313
50.1M
          0.1253805944856366f,
314
50.1M
          -0.4326608024727445f,
315
50.1M
          -0.2546827712406646f,
316
50.1M
      },
317
50.1M
      {
318
50.1M
          0.2500000000000000,
319
50.1M
          -0.1014005039375377f,
320
50.1M
          -0.4444481661973438f,
321
50.1M
          0.3085497062849487f,
322
50.1M
          0.0000000000000000,
323
50.1M
          -0.0643507165794628f,
324
50.1M
          0.1585450355183970f,
325
50.1M
          -0.5112616136592012f,
326
50.1M
          0.2579236279634129f,
327
50.1M
          0.0000000000000000,
328
50.1M
          -0.0812611176717504f,
329
50.1M
          -0.1856718091610990f,
330
50.1M
          -0.3416446842253373f,
331
50.1M
          0.3302282550303805f,
332
50.1M
          0.0702790691196282f,
333
50.1M
          -0.0741750459581023f,
334
50.1M
      },
335
50.1M
      {
336
50.1M
          0.2500000000000000,
337
50.1M
          -0.1014005039375376f,
338
50.1M
          -0.2929100136981264f,
339
50.1M
          0.0000000000000000,
340
50.1M
          0.0000000000000000,
341
50.1M
          -0.0643507165794627f,
342
50.1M
          0.3935103426921022f,
343
50.1M
          0.0657870154914254f,
344
50.1M
          0.0000000000000000,
345
50.1M
          0.4082482904638634f,
346
50.1M
          0.3078822139579031f,
347
50.1M
          0.3852501370925211f,
348
50.1M
          -0.0857401903551927f,
349
50.1M
          -0.4613374887461554f,
350
50.1M
          0.0000000000000000,
351
50.1M
          0.2191868483885728f,
352
50.1M
      },
353
50.1M
      {
354
50.1M
          0.2500000000000000,
355
50.1M
          -0.1014005039375376f,
356
50.1M
          -0.1137907446044814f,
357
50.1M
          -0.1464291867126654f,
358
50.1M
          0.0000000000000000,
359
50.1M
          -0.0643507165794627f,
360
50.1M
          0.0829816309488214f,
361
50.1M
          0.2388977352334547f,
362
50.1M
          -0.3531238544981624f,
363
50.1M
          0.4082482904638630f,
364
50.1M
          -0.4826689115059858f,
365
50.1M
          -0.1741941265991621f,
366
50.1M
          -0.0476868035022928f,
367
50.1M
          0.1253805944856431f,
368
50.1M
          -0.4326608024727457f,
369
50.1M
          -0.2546827712406641f,
370
50.1M
      },
371
50.1M
      {
372
50.1M
          0.2500000000000000,
373
50.1M
          -0.1014005039375374f,
374
50.1M
          0.0000000000000000,
375
50.1M
          0.4251149611657548f,
376
50.1M
          0.0000000000000000,
377
50.1M
          -0.0643507165794626f,
378
50.1M
          -0.4517556589999480f,
379
50.1M
          0.0000000000000000,
380
50.1M
          -0.6035859033230976f,
381
50.1M
          0.0000000000000000,
382
50.1M
          0.0000000000000000,
383
50.1M
          0.0000000000000000,
384
50.1M
          -0.1426608480880724f,
385
50.1M
          -0.1381354035075845f,
386
50.1M
          0.3487520519930227f,
387
50.1M
          0.1135498731499429f,
388
50.1M
      },
389
50.1M
  };
390
391
50.1M
  const HWY_CAPPED(float, 16) d;
392
150M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
100M
    auto scalar = Zero(d);
394
1.70G
    for (size_t j = 0; j < 16; j++) {
395
1.60G
      auto px = Set(d, pixels[j]);
396
1.60G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
1.60G
      scalar = MulAdd(px, basis, scalar);
398
1.60G
    }
399
100M
    Store(scalar, d, coeffs + i);
400
100M
  }
401
50.1M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
311k
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
311k
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
311k
      {
102
311k
          0.2500000000000000,
103
311k
          0.8769029297991420f,
104
311k
          0.0000000000000000,
105
311k
          0.0000000000000000,
106
311k
          0.0000000000000000,
107
311k
          -0.4105377591765233f,
108
311k
          0.0000000000000000,
109
311k
          0.0000000000000000,
110
311k
          0.0000000000000000,
111
311k
          0.0000000000000000,
112
311k
          0.0000000000000000,
113
311k
          0.0000000000000000,
114
311k
          0.0000000000000000,
115
311k
          0.0000000000000000,
116
311k
          0.0000000000000000,
117
311k
          0.0000000000000000,
118
311k
      },
119
311k
      {
120
311k
          0.2500000000000000,
121
311k
          0.2206518106944235f,
122
311k
          0.0000000000000000,
123
311k
          0.0000000000000000,
124
311k
          -0.7071067811865474f,
125
311k
          0.6235485373547691f,
126
311k
          0.0000000000000000,
127
311k
          0.0000000000000000,
128
311k
          0.0000000000000000,
129
311k
          0.0000000000000000,
130
311k
          0.0000000000000000,
131
311k
          0.0000000000000000,
132
311k
          0.0000000000000000,
133
311k
          0.0000000000000000,
134
311k
          0.0000000000000000,
135
311k
          0.0000000000000000,
136
311k
      },
137
311k
      {
138
311k
          0.2500000000000000,
139
311k
          -0.1014005039375376f,
140
311k
          0.4067007583026075f,
141
311k
          -0.2125574805828875f,
142
311k
          0.0000000000000000,
143
311k
          -0.0643507165794627f,
144
311k
          -0.4517556589999482f,
145
311k
          -0.3046847507248690f,
146
311k
          0.3017929516615495f,
147
311k
          0.4082482904638627f,
148
311k
          0.1747866975480809f,
149
311k
          -0.2110560104933578f,
150
311k
          -0.1426608480880726f,
151
311k
          -0.1381354035075859f,
152
311k
          -0.1743760259965107f,
153
311k
          0.1135498731499434f,
154
311k
      },
155
311k
      {
156
311k
          0.2500000000000000,
157
311k
          -0.1014005039375375f,
158
311k
          0.4444481661973445f,
159
311k
          0.3085497062849767f,
160
311k
          0.0000000000000000f,
161
311k
          -0.0643507165794627f,
162
311k
          0.1585450355184006f,
163
311k
          0.5112616136591823f,
164
311k
          0.2579236279634118f,
165
311k
          0.0000000000000000,
166
311k
          0.0812611176717539f,
167
311k
          0.1856718091610980f,
168
311k
          -0.3416446842253372f,
169
311k
          0.3302282550303788f,
170
311k
          0.0702790691196284f,
171
311k
          -0.0741750459581035f,
172
311k
      },
173
311k
      {
174
311k
          0.2500000000000000,
175
311k
          0.2206518106944236f,
176
311k
          0.0000000000000000,
177
311k
          0.0000000000000000,
178
311k
          0.7071067811865476f,
179
311k
          0.6235485373547694f,
180
311k
          0.0000000000000000,
181
311k
          0.0000000000000000,
182
311k
          0.0000000000000000,
183
311k
          0.0000000000000000,
184
311k
          0.0000000000000000,
185
311k
          0.0000000000000000,
186
311k
          0.0000000000000000,
187
311k
          0.0000000000000000,
188
311k
          0.0000000000000000,
189
311k
          0.0000000000000000,
190
311k
      },
191
311k
      {
192
311k
          0.2500000000000000,
193
311k
          -0.1014005039375378f,
194
311k
          0.0000000000000000,
195
311k
          0.4706702258572536f,
196
311k
          0.0000000000000000,
197
311k
          -0.0643507165794628f,
198
311k
          -0.0403851516082220f,
199
311k
          0.0000000000000000,
200
311k
          0.1627234014286620f,
201
311k
          0.0000000000000000,
202
311k
          0.0000000000000000,
203
311k
          0.0000000000000000,
204
311k
          0.7367497537172237f,
205
311k
          0.0875511500058708f,
206
311k
          -0.2921026642334881f,
207
311k
          0.1940289303259434f,
208
311k
      },
209
311k
      {
210
311k
          0.2500000000000000,
211
311k
          -0.1014005039375377f,
212
311k
          0.1957439937204294f,
213
311k
          -0.1621205195722993f,
214
311k
          0.0000000000000000,
215
311k
          -0.0643507165794628f,
216
311k
          0.0074182263792424f,
217
311k
          -0.2904801297289980f,
218
311k
          0.0952002265347504f,
219
311k
          0.0000000000000000,
220
311k
          -0.3675398009862027f,
221
311k
          0.4921585901373873f,
222
311k
          0.2462710772207515f,
223
311k
          -0.0794670660590957f,
224
311k
          0.3623817333531167f,
225
311k
          -0.4351904965232280f,
226
311k
      },
227
311k
      {
228
311k
          0.2500000000000000,
229
311k
          -0.1014005039375376f,
230
311k
          0.2929100136981264f,
231
311k
          0.0000000000000000,
232
311k
          0.0000000000000000,
233
311k
          -0.0643507165794627f,
234
311k
          0.3935103426921017f,
235
311k
          -0.0657870154914280f,
236
311k
          0.0000000000000000,
237
311k
          -0.4082482904638628f,
238
311k
          -0.3078822139579090f,
239
311k
          -0.3852501370925192f,
240
311k
          -0.0857401903551931f,
241
311k
          -0.4613374887461511f,
242
311k
          0.0000000000000000,
243
311k
          0.2191868483885747f,
244
311k
      },
245
311k
      {
246
311k
          0.2500000000000000,
247
311k
          -0.1014005039375376f,
248
311k
          -0.4067007583026072f,
249
311k
          -0.2125574805828705f,
250
311k
          0.0000000000000000,
251
311k
          -0.0643507165794627f,
252
311k
          -0.4517556589999464f,
253
311k
          0.3046847507248840f,
254
311k
          0.3017929516615503f,
255
311k
          -0.4082482904638635f,
256
311k
          -0.1747866975480813f,
257
311k
          0.2110560104933581f,
258
311k
          -0.1426608480880734f,
259
311k
          -0.1381354035075829f,
260
311k
          -0.1743760259965108f,
261
311k
          0.1135498731499426f,
262
311k
      },
263
311k
      {
264
311k
          0.2500000000000000,
265
311k
          -0.1014005039375377f,
266
311k
          -0.1957439937204287f,
267
311k
          -0.1621205195722833f,
268
311k
          0.0000000000000000,
269
311k
          -0.0643507165794628f,
270
311k
          0.0074182263792444f,
271
311k
          0.2904801297290076f,
272
311k
          0.0952002265347505f,
273
311k
          0.0000000000000000,
274
311k
          0.3675398009862011f,
275
311k
          -0.4921585901373891f,
276
311k
          0.2462710772207514f,
277
311k
          -0.0794670660591026f,
278
311k
          0.3623817333531165f,
279
311k
          -0.4351904965232251f,
280
311k
      },
281
311k
      {
282
311k
          0.2500000000000000,
283
311k
          -0.1014005039375375f,
284
311k
          0.0000000000000000,
285
311k
          -0.4706702258572528f,
286
311k
          0.0000000000000000,
287
311k
          -0.0643507165794627f,
288
311k
          0.1107416575309343f,
289
311k
          0.0000000000000000,
290
311k
          -0.1627234014286617f,
291
311k
          0.0000000000000000,
292
311k
          0.0000000000000000,
293
311k
          0.0000000000000000,
294
311k
          0.1488339922711357f,
295
311k
          0.4972464710953509f,
296
311k
          0.2921026642334879f,
297
311k
          0.5550443808910661f,
298
311k
      },
299
311k
      {
300
311k
          0.2500000000000000,
301
311k
          -0.1014005039375377f,
302
311k
          0.1137907446044809f,
303
311k
          -0.1464291867126764f,
304
311k
          0.0000000000000000,
305
311k
          -0.0643507165794628f,
306
311k
          0.0829816309488205f,
307
311k
          -0.2388977352334460f,
308
311k
          -0.3531238544981630f,
309
311k
          -0.4082482904638630f,
310
311k
          0.4826689115059883f,
311
311k
          0.1741941265991622f,
312
311k
          -0.0476868035022925f,
313
311k
          0.1253805944856366f,
314
311k
          -0.4326608024727445f,
315
311k
          -0.2546827712406646f,
316
311k
      },
317
311k
      {
318
311k
          0.2500000000000000,
319
311k
          -0.1014005039375377f,
320
311k
          -0.4444481661973438f,
321
311k
          0.3085497062849487f,
322
311k
          0.0000000000000000,
323
311k
          -0.0643507165794628f,
324
311k
          0.1585450355183970f,
325
311k
          -0.5112616136592012f,
326
311k
          0.2579236279634129f,
327
311k
          0.0000000000000000,
328
311k
          -0.0812611176717504f,
329
311k
          -0.1856718091610990f,
330
311k
          -0.3416446842253373f,
331
311k
          0.3302282550303805f,
332
311k
          0.0702790691196282f,
333
311k
          -0.0741750459581023f,
334
311k
      },
335
311k
      {
336
311k
          0.2500000000000000,
337
311k
          -0.1014005039375376f,
338
311k
          -0.2929100136981264f,
339
311k
          0.0000000000000000,
340
311k
          0.0000000000000000,
341
311k
          -0.0643507165794627f,
342
311k
          0.3935103426921022f,
343
311k
          0.0657870154914254f,
344
311k
          0.0000000000000000,
345
311k
          0.4082482904638634f,
346
311k
          0.3078822139579031f,
347
311k
          0.3852501370925211f,
348
311k
          -0.0857401903551927f,
349
311k
          -0.4613374887461554f,
350
311k
          0.0000000000000000,
351
311k
          0.2191868483885728f,
352
311k
      },
353
311k
      {
354
311k
          0.2500000000000000,
355
311k
          -0.1014005039375376f,
356
311k
          -0.1137907446044814f,
357
311k
          -0.1464291867126654f,
358
311k
          0.0000000000000000,
359
311k
          -0.0643507165794627f,
360
311k
          0.0829816309488214f,
361
311k
          0.2388977352334547f,
362
311k
          -0.3531238544981624f,
363
311k
          0.4082482904638630f,
364
311k
          -0.4826689115059858f,
365
311k
          -0.1741941265991621f,
366
311k
          -0.0476868035022928f,
367
311k
          0.1253805944856431f,
368
311k
          -0.4326608024727457f,
369
311k
          -0.2546827712406641f,
370
311k
      },
371
311k
      {
372
311k
          0.2500000000000000,
373
311k
          -0.1014005039375374f,
374
311k
          0.0000000000000000,
375
311k
          0.4251149611657548f,
376
311k
          0.0000000000000000,
377
311k
          -0.0643507165794626f,
378
311k
          -0.4517556589999480f,
379
311k
          0.0000000000000000,
380
311k
          -0.6035859033230976f,
381
311k
          0.0000000000000000,
382
311k
          0.0000000000000000,
383
311k
          0.0000000000000000,
384
311k
          -0.1426608480880724f,
385
311k
          -0.1381354035075845f,
386
311k
          0.3487520519930227f,
387
311k
          0.1135498731499429f,
388
311k
      },
389
311k
  };
390
391
311k
  const HWY_CAPPED(float, 16) d;
392
933k
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
622k
    auto scalar = Zero(d);
394
10.5M
    for (size_t j = 0; j < 16; j++) {
395
9.95M
      auto px = Set(d, pixels[j]);
396
9.95M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
9.95M
      scalar = MulAdd(px, basis, scalar);
398
9.95M
    }
399
622k
    Store(scalar, d, coeffs + i);
400
622k
  }
401
311k
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
311k
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
311k
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
311k
      {
102
311k
          0.2500000000000000,
103
311k
          0.8769029297991420f,
104
311k
          0.0000000000000000,
105
311k
          0.0000000000000000,
106
311k
          0.0000000000000000,
107
311k
          -0.4105377591765233f,
108
311k
          0.0000000000000000,
109
311k
          0.0000000000000000,
110
311k
          0.0000000000000000,
111
311k
          0.0000000000000000,
112
311k
          0.0000000000000000,
113
311k
          0.0000000000000000,
114
311k
          0.0000000000000000,
115
311k
          0.0000000000000000,
116
311k
          0.0000000000000000,
117
311k
          0.0000000000000000,
118
311k
      },
119
311k
      {
120
311k
          0.2500000000000000,
121
311k
          0.2206518106944235f,
122
311k
          0.0000000000000000,
123
311k
          0.0000000000000000,
124
311k
          -0.7071067811865474f,
125
311k
          0.6235485373547691f,
126
311k
          0.0000000000000000,
127
311k
          0.0000000000000000,
128
311k
          0.0000000000000000,
129
311k
          0.0000000000000000,
130
311k
          0.0000000000000000,
131
311k
          0.0000000000000000,
132
311k
          0.0000000000000000,
133
311k
          0.0000000000000000,
134
311k
          0.0000000000000000,
135
311k
          0.0000000000000000,
136
311k
      },
137
311k
      {
138
311k
          0.2500000000000000,
139
311k
          -0.1014005039375376f,
140
311k
          0.4067007583026075f,
141
311k
          -0.2125574805828875f,
142
311k
          0.0000000000000000,
143
311k
          -0.0643507165794627f,
144
311k
          -0.4517556589999482f,
145
311k
          -0.3046847507248690f,
146
311k
          0.3017929516615495f,
147
311k
          0.4082482904638627f,
148
311k
          0.1747866975480809f,
149
311k
          -0.2110560104933578f,
150
311k
          -0.1426608480880726f,
151
311k
          -0.1381354035075859f,
152
311k
          -0.1743760259965107f,
153
311k
          0.1135498731499434f,
154
311k
      },
155
311k
      {
156
311k
          0.2500000000000000,
157
311k
          -0.1014005039375375f,
158
311k
          0.4444481661973445f,
159
311k
          0.3085497062849767f,
160
311k
          0.0000000000000000f,
161
311k
          -0.0643507165794627f,
162
311k
          0.1585450355184006f,
163
311k
          0.5112616136591823f,
164
311k
          0.2579236279634118f,
165
311k
          0.0000000000000000,
166
311k
          0.0812611176717539f,
167
311k
          0.1856718091610980f,
168
311k
          -0.3416446842253372f,
169
311k
          0.3302282550303788f,
170
311k
          0.0702790691196284f,
171
311k
          -0.0741750459581035f,
172
311k
      },
173
311k
      {
174
311k
          0.2500000000000000,
175
311k
          0.2206518106944236f,
176
311k
          0.0000000000000000,
177
311k
          0.0000000000000000,
178
311k
          0.7071067811865476f,
179
311k
          0.6235485373547694f,
180
311k
          0.0000000000000000,
181
311k
          0.0000000000000000,
182
311k
          0.0000000000000000,
183
311k
          0.0000000000000000,
184
311k
          0.0000000000000000,
185
311k
          0.0000000000000000,
186
311k
          0.0000000000000000,
187
311k
          0.0000000000000000,
188
311k
          0.0000000000000000,
189
311k
          0.0000000000000000,
190
311k
      },
191
311k
      {
192
311k
          0.2500000000000000,
193
311k
          -0.1014005039375378f,
194
311k
          0.0000000000000000,
195
311k
          0.4706702258572536f,
196
311k
          0.0000000000000000,
197
311k
          -0.0643507165794628f,
198
311k
          -0.0403851516082220f,
199
311k
          0.0000000000000000,
200
311k
          0.1627234014286620f,
201
311k
          0.0000000000000000,
202
311k
          0.0000000000000000,
203
311k
          0.0000000000000000,
204
311k
          0.7367497537172237f,
205
311k
          0.0875511500058708f,
206
311k
          -0.2921026642334881f,
207
311k
          0.1940289303259434f,
208
311k
      },
209
311k
      {
210
311k
          0.2500000000000000,
211
311k
          -0.1014005039375377f,
212
311k
          0.1957439937204294f,
213
311k
          -0.1621205195722993f,
214
311k
          0.0000000000000000,
215
311k
          -0.0643507165794628f,
216
311k
          0.0074182263792424f,
217
311k
          -0.2904801297289980f,
218
311k
          0.0952002265347504f,
219
311k
          0.0000000000000000,
220
311k
          -0.3675398009862027f,
221
311k
          0.4921585901373873f,
222
311k
          0.2462710772207515f,
223
311k
          -0.0794670660590957f,
224
311k
          0.3623817333531167f,
225
311k
          -0.4351904965232280f,
226
311k
      },
227
311k
      {
228
311k
          0.2500000000000000,
229
311k
          -0.1014005039375376f,
230
311k
          0.2929100136981264f,
231
311k
          0.0000000000000000,
232
311k
          0.0000000000000000,
233
311k
          -0.0643507165794627f,
234
311k
          0.3935103426921017f,
235
311k
          -0.0657870154914280f,
236
311k
          0.0000000000000000,
237
311k
          -0.4082482904638628f,
238
311k
          -0.3078822139579090f,
239
311k
          -0.3852501370925192f,
240
311k
          -0.0857401903551931f,
241
311k
          -0.4613374887461511f,
242
311k
          0.0000000000000000,
243
311k
          0.2191868483885747f,
244
311k
      },
245
311k
      {
246
311k
          0.2500000000000000,
247
311k
          -0.1014005039375376f,
248
311k
          -0.4067007583026072f,
249
311k
          -0.2125574805828705f,
250
311k
          0.0000000000000000,
251
311k
          -0.0643507165794627f,
252
311k
          -0.4517556589999464f,
253
311k
          0.3046847507248840f,
254
311k
          0.3017929516615503f,
255
311k
          -0.4082482904638635f,
256
311k
          -0.1747866975480813f,
257
311k
          0.2110560104933581f,
258
311k
          -0.1426608480880734f,
259
311k
          -0.1381354035075829f,
260
311k
          -0.1743760259965108f,
261
311k
          0.1135498731499426f,
262
311k
      },
263
311k
      {
264
311k
          0.2500000000000000,
265
311k
          -0.1014005039375377f,
266
311k
          -0.1957439937204287f,
267
311k
          -0.1621205195722833f,
268
311k
          0.0000000000000000,
269
311k
          -0.0643507165794628f,
270
311k
          0.0074182263792444f,
271
311k
          0.2904801297290076f,
272
311k
          0.0952002265347505f,
273
311k
          0.0000000000000000,
274
311k
          0.3675398009862011f,
275
311k
          -0.4921585901373891f,
276
311k
          0.2462710772207514f,
277
311k
          -0.0794670660591026f,
278
311k
          0.3623817333531165f,
279
311k
          -0.4351904965232251f,
280
311k
      },
281
311k
      {
282
311k
          0.2500000000000000,
283
311k
          -0.1014005039375375f,
284
311k
          0.0000000000000000,
285
311k
          -0.4706702258572528f,
286
311k
          0.0000000000000000,
287
311k
          -0.0643507165794627f,
288
311k
          0.1107416575309343f,
289
311k
          0.0000000000000000,
290
311k
          -0.1627234014286617f,
291
311k
          0.0000000000000000,
292
311k
          0.0000000000000000,
293
311k
          0.0000000000000000,
294
311k
          0.1488339922711357f,
295
311k
          0.4972464710953509f,
296
311k
          0.2921026642334879f,
297
311k
          0.5550443808910661f,
298
311k
      },
299
311k
      {
300
311k
          0.2500000000000000,
301
311k
          -0.1014005039375377f,
302
311k
          0.1137907446044809f,
303
311k
          -0.1464291867126764f,
304
311k
          0.0000000000000000,
305
311k
          -0.0643507165794628f,
306
311k
          0.0829816309488205f,
307
311k
          -0.2388977352334460f,
308
311k
          -0.3531238544981630f,
309
311k
          -0.4082482904638630f,
310
311k
          0.4826689115059883f,
311
311k
          0.1741941265991622f,
312
311k
          -0.0476868035022925f,
313
311k
          0.1253805944856366f,
314
311k
          -0.4326608024727445f,
315
311k
          -0.2546827712406646f,
316
311k
      },
317
311k
      {
318
311k
          0.2500000000000000,
319
311k
          -0.1014005039375377f,
320
311k
          -0.4444481661973438f,
321
311k
          0.3085497062849487f,
322
311k
          0.0000000000000000,
323
311k
          -0.0643507165794628f,
324
311k
          0.1585450355183970f,
325
311k
          -0.5112616136592012f,
326
311k
          0.2579236279634129f,
327
311k
          0.0000000000000000,
328
311k
          -0.0812611176717504f,
329
311k
          -0.1856718091610990f,
330
311k
          -0.3416446842253373f,
331
311k
          0.3302282550303805f,
332
311k
          0.0702790691196282f,
333
311k
          -0.0741750459581023f,
334
311k
      },
335
311k
      {
336
311k
          0.2500000000000000,
337
311k
          -0.1014005039375376f,
338
311k
          -0.2929100136981264f,
339
311k
          0.0000000000000000,
340
311k
          0.0000000000000000,
341
311k
          -0.0643507165794627f,
342
311k
          0.3935103426921022f,
343
311k
          0.0657870154914254f,
344
311k
          0.0000000000000000,
345
311k
          0.4082482904638634f,
346
311k
          0.3078822139579031f,
347
311k
          0.3852501370925211f,
348
311k
          -0.0857401903551927f,
349
311k
          -0.4613374887461554f,
350
311k
          0.0000000000000000,
351
311k
          0.2191868483885728f,
352
311k
      },
353
311k
      {
354
311k
          0.2500000000000000,
355
311k
          -0.1014005039375376f,
356
311k
          -0.1137907446044814f,
357
311k
          -0.1464291867126654f,
358
311k
          0.0000000000000000,
359
311k
          -0.0643507165794627f,
360
311k
          0.0829816309488214f,
361
311k
          0.2388977352334547f,
362
311k
          -0.3531238544981624f,
363
311k
          0.4082482904638630f,
364
311k
          -0.4826689115059858f,
365
311k
          -0.1741941265991621f,
366
311k
          -0.0476868035022928f,
367
311k
          0.1253805944856431f,
368
311k
          -0.4326608024727457f,
369
311k
          -0.2546827712406641f,
370
311k
      },
371
311k
      {
372
311k
          0.2500000000000000,
373
311k
          -0.1014005039375374f,
374
311k
          0.0000000000000000,
375
311k
          0.4251149611657548f,
376
311k
          0.0000000000000000,
377
311k
          -0.0643507165794626f,
378
311k
          -0.4517556589999480f,
379
311k
          0.0000000000000000,
380
311k
          -0.6035859033230976f,
381
311k
          0.0000000000000000,
382
311k
          0.0000000000000000,
383
311k
          0.0000000000000000,
384
311k
          -0.1426608480880724f,
385
311k
          -0.1381354035075845f,
386
311k
          0.3487520519930227f,
387
311k
          0.1135498731499429f,
388
311k
      },
389
311k
  };
390
391
311k
  const HWY_CAPPED(float, 16) d;
392
933k
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
622k
    auto scalar = Zero(d);
394
10.5M
    for (size_t j = 0; j < 16; j++) {
395
9.95M
      auto px = Set(d, pixels[j]);
396
9.95M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
9.95M
      scalar = MulAdd(px, basis, scalar);
398
9.95M
    }
399
622k
    Store(scalar, d, coeffs + i);
400
622k
  }
401
311k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
49.5M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
49.5M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
49.5M
      {
102
49.5M
          0.2500000000000000,
103
49.5M
          0.8769029297991420f,
104
49.5M
          0.0000000000000000,
105
49.5M
          0.0000000000000000,
106
49.5M
          0.0000000000000000,
107
49.5M
          -0.4105377591765233f,
108
49.5M
          0.0000000000000000,
109
49.5M
          0.0000000000000000,
110
49.5M
          0.0000000000000000,
111
49.5M
          0.0000000000000000,
112
49.5M
          0.0000000000000000,
113
49.5M
          0.0000000000000000,
114
49.5M
          0.0000000000000000,
115
49.5M
          0.0000000000000000,
116
49.5M
          0.0000000000000000,
117
49.5M
          0.0000000000000000,
118
49.5M
      },
119
49.5M
      {
120
49.5M
          0.2500000000000000,
121
49.5M
          0.2206518106944235f,
122
49.5M
          0.0000000000000000,
123
49.5M
          0.0000000000000000,
124
49.5M
          -0.7071067811865474f,
125
49.5M
          0.6235485373547691f,
126
49.5M
          0.0000000000000000,
127
49.5M
          0.0000000000000000,
128
49.5M
          0.0000000000000000,
129
49.5M
          0.0000000000000000,
130
49.5M
          0.0000000000000000,
131
49.5M
          0.0000000000000000,
132
49.5M
          0.0000000000000000,
133
49.5M
          0.0000000000000000,
134
49.5M
          0.0000000000000000,
135
49.5M
          0.0000000000000000,
136
49.5M
      },
137
49.5M
      {
138
49.5M
          0.2500000000000000,
139
49.5M
          -0.1014005039375376f,
140
49.5M
          0.4067007583026075f,
141
49.5M
          -0.2125574805828875f,
142
49.5M
          0.0000000000000000,
143
49.5M
          -0.0643507165794627f,
144
49.5M
          -0.4517556589999482f,
145
49.5M
          -0.3046847507248690f,
146
49.5M
          0.3017929516615495f,
147
49.5M
          0.4082482904638627f,
148
49.5M
          0.1747866975480809f,
149
49.5M
          -0.2110560104933578f,
150
49.5M
          -0.1426608480880726f,
151
49.5M
          -0.1381354035075859f,
152
49.5M
          -0.1743760259965107f,
153
49.5M
          0.1135498731499434f,
154
49.5M
      },
155
49.5M
      {
156
49.5M
          0.2500000000000000,
157
49.5M
          -0.1014005039375375f,
158
49.5M
          0.4444481661973445f,
159
49.5M
          0.3085497062849767f,
160
49.5M
          0.0000000000000000f,
161
49.5M
          -0.0643507165794627f,
162
49.5M
          0.1585450355184006f,
163
49.5M
          0.5112616136591823f,
164
49.5M
          0.2579236279634118f,
165
49.5M
          0.0000000000000000,
166
49.5M
          0.0812611176717539f,
167
49.5M
          0.1856718091610980f,
168
49.5M
          -0.3416446842253372f,
169
49.5M
          0.3302282550303788f,
170
49.5M
          0.0702790691196284f,
171
49.5M
          -0.0741750459581035f,
172
49.5M
      },
173
49.5M
      {
174
49.5M
          0.2500000000000000,
175
49.5M
          0.2206518106944236f,
176
49.5M
          0.0000000000000000,
177
49.5M
          0.0000000000000000,
178
49.5M
          0.7071067811865476f,
179
49.5M
          0.6235485373547694f,
180
49.5M
          0.0000000000000000,
181
49.5M
          0.0000000000000000,
182
49.5M
          0.0000000000000000,
183
49.5M
          0.0000000000000000,
184
49.5M
          0.0000000000000000,
185
49.5M
          0.0000000000000000,
186
49.5M
          0.0000000000000000,
187
49.5M
          0.0000000000000000,
188
49.5M
          0.0000000000000000,
189
49.5M
          0.0000000000000000,
190
49.5M
      },
191
49.5M
      {
192
49.5M
          0.2500000000000000,
193
49.5M
          -0.1014005039375378f,
194
49.5M
          0.0000000000000000,
195
49.5M
          0.4706702258572536f,
196
49.5M
          0.0000000000000000,
197
49.5M
          -0.0643507165794628f,
198
49.5M
          -0.0403851516082220f,
199
49.5M
          0.0000000000000000,
200
49.5M
          0.1627234014286620f,
201
49.5M
          0.0000000000000000,
202
49.5M
          0.0000000000000000,
203
49.5M
          0.0000000000000000,
204
49.5M
          0.7367497537172237f,
205
49.5M
          0.0875511500058708f,
206
49.5M
          -0.2921026642334881f,
207
49.5M
          0.1940289303259434f,
208
49.5M
      },
209
49.5M
      {
210
49.5M
          0.2500000000000000,
211
49.5M
          -0.1014005039375377f,
212
49.5M
          0.1957439937204294f,
213
49.5M
          -0.1621205195722993f,
214
49.5M
          0.0000000000000000,
215
49.5M
          -0.0643507165794628f,
216
49.5M
          0.0074182263792424f,
217
49.5M
          -0.2904801297289980f,
218
49.5M
          0.0952002265347504f,
219
49.5M
          0.0000000000000000,
220
49.5M
          -0.3675398009862027f,
221
49.5M
          0.4921585901373873f,
222
49.5M
          0.2462710772207515f,
223
49.5M
          -0.0794670660590957f,
224
49.5M
          0.3623817333531167f,
225
49.5M
          -0.4351904965232280f,
226
49.5M
      },
227
49.5M
      {
228
49.5M
          0.2500000000000000,
229
49.5M
          -0.1014005039375376f,
230
49.5M
          0.2929100136981264f,
231
49.5M
          0.0000000000000000,
232
49.5M
          0.0000000000000000,
233
49.5M
          -0.0643507165794627f,
234
49.5M
          0.3935103426921017f,
235
49.5M
          -0.0657870154914280f,
236
49.5M
          0.0000000000000000,
237
49.5M
          -0.4082482904638628f,
238
49.5M
          -0.3078822139579090f,
239
49.5M
          -0.3852501370925192f,
240
49.5M
          -0.0857401903551931f,
241
49.5M
          -0.4613374887461511f,
242
49.5M
          0.0000000000000000,
243
49.5M
          0.2191868483885747f,
244
49.5M
      },
245
49.5M
      {
246
49.5M
          0.2500000000000000,
247
49.5M
          -0.1014005039375376f,
248
49.5M
          -0.4067007583026072f,
249
49.5M
          -0.2125574805828705f,
250
49.5M
          0.0000000000000000,
251
49.5M
          -0.0643507165794627f,
252
49.5M
          -0.4517556589999464f,
253
49.5M
          0.3046847507248840f,
254
49.5M
          0.3017929516615503f,
255
49.5M
          -0.4082482904638635f,
256
49.5M
          -0.1747866975480813f,
257
49.5M
          0.2110560104933581f,
258
49.5M
          -0.1426608480880734f,
259
49.5M
          -0.1381354035075829f,
260
49.5M
          -0.1743760259965108f,
261
49.5M
          0.1135498731499426f,
262
49.5M
      },
263
49.5M
      {
264
49.5M
          0.2500000000000000,
265
49.5M
          -0.1014005039375377f,
266
49.5M
          -0.1957439937204287f,
267
49.5M
          -0.1621205195722833f,
268
49.5M
          0.0000000000000000,
269
49.5M
          -0.0643507165794628f,
270
49.5M
          0.0074182263792444f,
271
49.5M
          0.2904801297290076f,
272
49.5M
          0.0952002265347505f,
273
49.5M
          0.0000000000000000,
274
49.5M
          0.3675398009862011f,
275
49.5M
          -0.4921585901373891f,
276
49.5M
          0.2462710772207514f,
277
49.5M
          -0.0794670660591026f,
278
49.5M
          0.3623817333531165f,
279
49.5M
          -0.4351904965232251f,
280
49.5M
      },
281
49.5M
      {
282
49.5M
          0.2500000000000000,
283
49.5M
          -0.1014005039375375f,
284
49.5M
          0.0000000000000000,
285
49.5M
          -0.4706702258572528f,
286
49.5M
          0.0000000000000000,
287
49.5M
          -0.0643507165794627f,
288
49.5M
          0.1107416575309343f,
289
49.5M
          0.0000000000000000,
290
49.5M
          -0.1627234014286617f,
291
49.5M
          0.0000000000000000,
292
49.5M
          0.0000000000000000,
293
49.5M
          0.0000000000000000,
294
49.5M
          0.1488339922711357f,
295
49.5M
          0.4972464710953509f,
296
49.5M
          0.2921026642334879f,
297
49.5M
          0.5550443808910661f,
298
49.5M
      },
299
49.5M
      {
300
49.5M
          0.2500000000000000,
301
49.5M
          -0.1014005039375377f,
302
49.5M
          0.1137907446044809f,
303
49.5M
          -0.1464291867126764f,
304
49.5M
          0.0000000000000000,
305
49.5M
          -0.0643507165794628f,
306
49.5M
          0.0829816309488205f,
307
49.5M
          -0.2388977352334460f,
308
49.5M
          -0.3531238544981630f,
309
49.5M
          -0.4082482904638630f,
310
49.5M
          0.4826689115059883f,
311
49.5M
          0.1741941265991622f,
312
49.5M
          -0.0476868035022925f,
313
49.5M
          0.1253805944856366f,
314
49.5M
          -0.4326608024727445f,
315
49.5M
          -0.2546827712406646f,
316
49.5M
      },
317
49.5M
      {
318
49.5M
          0.2500000000000000,
319
49.5M
          -0.1014005039375377f,
320
49.5M
          -0.4444481661973438f,
321
49.5M
          0.3085497062849487f,
322
49.5M
          0.0000000000000000,
323
49.5M
          -0.0643507165794628f,
324
49.5M
          0.1585450355183970f,
325
49.5M
          -0.5112616136592012f,
326
49.5M
          0.2579236279634129f,
327
49.5M
          0.0000000000000000,
328
49.5M
          -0.0812611176717504f,
329
49.5M
          -0.1856718091610990f,
330
49.5M
          -0.3416446842253373f,
331
49.5M
          0.3302282550303805f,
332
49.5M
          0.0702790691196282f,
333
49.5M
          -0.0741750459581023f,
334
49.5M
      },
335
49.5M
      {
336
49.5M
          0.2500000000000000,
337
49.5M
          -0.1014005039375376f,
338
49.5M
          -0.2929100136981264f,
339
49.5M
          0.0000000000000000,
340
49.5M
          0.0000000000000000,
341
49.5M
          -0.0643507165794627f,
342
49.5M
          0.3935103426921022f,
343
49.5M
          0.0657870154914254f,
344
49.5M
          0.0000000000000000,
345
49.5M
          0.4082482904638634f,
346
49.5M
          0.3078822139579031f,
347
49.5M
          0.3852501370925211f,
348
49.5M
          -0.0857401903551927f,
349
49.5M
          -0.4613374887461554f,
350
49.5M
          0.0000000000000000,
351
49.5M
          0.2191868483885728f,
352
49.5M
      },
353
49.5M
      {
354
49.5M
          0.2500000000000000,
355
49.5M
          -0.1014005039375376f,
356
49.5M
          -0.1137907446044814f,
357
49.5M
          -0.1464291867126654f,
358
49.5M
          0.0000000000000000,
359
49.5M
          -0.0643507165794627f,
360
49.5M
          0.0829816309488214f,
361
49.5M
          0.2388977352334547f,
362
49.5M
          -0.3531238544981624f,
363
49.5M
          0.4082482904638630f,
364
49.5M
          -0.4826689115059858f,
365
49.5M
          -0.1741941265991621f,
366
49.5M
          -0.0476868035022928f,
367
49.5M
          0.1253805944856431f,
368
49.5M
          -0.4326608024727457f,
369
49.5M
          -0.2546827712406641f,
370
49.5M
      },
371
49.5M
      {
372
49.5M
          0.2500000000000000,
373
49.5M
          -0.1014005039375374f,
374
49.5M
          0.0000000000000000,
375
49.5M
          0.4251149611657548f,
376
49.5M
          0.0000000000000000,
377
49.5M
          -0.0643507165794626f,
378
49.5M
          -0.4517556589999480f,
379
49.5M
          0.0000000000000000,
380
49.5M
          -0.6035859033230976f,
381
49.5M
          0.0000000000000000,
382
49.5M
          0.0000000000000000,
383
49.5M
          0.0000000000000000,
384
49.5M
          -0.1426608480880724f,
385
49.5M
          -0.1381354035075845f,
386
49.5M
          0.3487520519930227f,
387
49.5M
          0.1135498731499429f,
388
49.5M
      },
389
49.5M
  };
390
391
49.5M
  const HWY_CAPPED(float, 16) d;
392
148M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
99.1M
    auto scalar = Zero(d);
394
1.68G
    for (size_t j = 0; j < 16; j++) {
395
1.58G
      auto px = Set(d, pixels[j]);
396
1.58G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
1.58G
      scalar = MulAdd(px, basis, scalar);
398
1.58G
    }
399
99.1M
    Store(scalar, d, coeffs + i);
400
99.1M
  }
401
49.5M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
402
403
// Coefficient layout:
404
//  - (even, even) positions hold AFV coefficients
405
//  - (odd, even) positions hold DCT4x4 coefficients
406
//  - (any, odd) positions hold DCT4x8 coefficients
407
template <size_t afv_kind>
408
void AFVTransformFromPixels(const float* JXL_RESTRICT pixels,
409
                            size_t pixels_stride,
410
50.1M
                            float* JXL_RESTRICT coefficients) {
411
50.1M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
50.1M
  size_t afv_x = afv_kind & 1;
413
50.1M
  size_t afv_y = afv_kind / 2;
414
50.1M
  HWY_ALIGN float block[4 * 8] = {};
415
250M
  for (size_t iy = 0; iy < 4; iy++) {
416
1.00G
    for (size_t ix = 0; ix < 4; ix++) {
417
802M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
802M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
802M
    }
420
200M
  }
421
  // AFV coefficients in (even, even) positions.
422
50.1M
  HWY_ALIGN float coeff[4 * 4];
423
50.1M
  AFVDCT4x4(block, coeff);
424
250M
  for (size_t iy = 0; iy < 4; iy++) {
425
1.00G
    for (size_t ix = 0; ix < 4; ix++) {
426
802M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
802M
    }
428
200M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
50.1M
  ComputeScaledDCT<4, 4>()(
431
50.1M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
50.1M
              pixels_stride),
433
50.1M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
250M
  for (size_t iy = 0; iy < 4; iy++) {
436
1.80G
    for (size_t ix = 0; ix < 8; ix++) {
437
1.60G
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
1.60G
    }
439
200M
  }
440
  // 4x8 DCT of the other half of the block.
441
50.1M
  ComputeScaledDCT<4, 8>()(
442
50.1M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
50.1M
      block, scratch_space);
444
250M
  for (size_t iy = 0; iy < 4; iy++) {
445
1.80G
    for (size_t ix = 0; ix < 8; ix++) {
446
1.60G
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
1.60G
    }
448
200M
  }
449
50.1M
  float block00 = coefficients[0] * 0.25f;
450
50.1M
  float block01 = coefficients[1];
451
50.1M
  float block10 = coefficients[8];
452
50.1M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
50.1M
  coefficients[1] = (block00 - block01) * 0.5f;
454
50.1M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
50.1M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
104k
                            float* JXL_RESTRICT coefficients) {
411
104k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
104k
  size_t afv_x = afv_kind & 1;
413
104k
  size_t afv_y = afv_kind / 2;
414
104k
  HWY_ALIGN float block[4 * 8] = {};
415
521k
  for (size_t iy = 0; iy < 4; iy++) {
416
2.08M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.66M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.66M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.66M
    }
420
417k
  }
421
  // AFV coefficients in (even, even) positions.
422
104k
  HWY_ALIGN float coeff[4 * 4];
423
104k
  AFVDCT4x4(block, coeff);
424
521k
  for (size_t iy = 0; iy < 4; iy++) {
425
2.08M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.66M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.66M
    }
428
417k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
104k
  ComputeScaledDCT<4, 4>()(
431
104k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
104k
              pixels_stride),
433
104k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
521k
  for (size_t iy = 0; iy < 4; iy++) {
436
3.75M
    for (size_t ix = 0; ix < 8; ix++) {
437
3.33M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
3.33M
    }
439
417k
  }
440
  // 4x8 DCT of the other half of the block.
441
104k
  ComputeScaledDCT<4, 8>()(
442
104k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
104k
      block, scratch_space);
444
521k
  for (size_t iy = 0; iy < 4; iy++) {
445
3.75M
    for (size_t ix = 0; ix < 8; ix++) {
446
3.33M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
3.33M
    }
448
417k
  }
449
104k
  float block00 = coefficients[0] * 0.25f;
450
104k
  float block01 = coefficients[1];
451
104k
  float block10 = coefficients[8];
452
104k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
104k
  coefficients[1] = (block00 - block01) * 0.5f;
454
104k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
104k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
57.4k
                            float* JXL_RESTRICT coefficients) {
411
57.4k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
57.4k
  size_t afv_x = afv_kind & 1;
413
57.4k
  size_t afv_y = afv_kind / 2;
414
57.4k
  HWY_ALIGN float block[4 * 8] = {};
415
287k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.14M
    for (size_t ix = 0; ix < 4; ix++) {
417
919k
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
919k
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
919k
    }
420
229k
  }
421
  // AFV coefficients in (even, even) positions.
422
57.4k
  HWY_ALIGN float coeff[4 * 4];
423
57.4k
  AFVDCT4x4(block, coeff);
424
287k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.14M
    for (size_t ix = 0; ix < 4; ix++) {
426
919k
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
919k
    }
428
229k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
57.4k
  ComputeScaledDCT<4, 4>()(
431
57.4k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
57.4k
              pixels_stride),
433
57.4k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
287k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.06M
    for (size_t ix = 0; ix < 8; ix++) {
437
1.83M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
1.83M
    }
439
229k
  }
440
  // 4x8 DCT of the other half of the block.
441
57.4k
  ComputeScaledDCT<4, 8>()(
442
57.4k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
57.4k
      block, scratch_space);
444
287k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.06M
    for (size_t ix = 0; ix < 8; ix++) {
446
1.83M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
1.83M
    }
448
229k
  }
449
57.4k
  float block00 = coefficients[0] * 0.25f;
450
57.4k
  float block01 = coefficients[1];
451
57.4k
  float block10 = coefficients[8];
452
57.4k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
57.4k
  coefficients[1] = (block00 - block01) * 0.5f;
454
57.4k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
57.4k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
72.9k
                            float* JXL_RESTRICT coefficients) {
411
72.9k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
72.9k
  size_t afv_x = afv_kind & 1;
413
72.9k
  size_t afv_y = afv_kind / 2;
414
72.9k
  HWY_ALIGN float block[4 * 8] = {};
415
364k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.45M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.16M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.16M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.16M
    }
420
291k
  }
421
  // AFV coefficients in (even, even) positions.
422
72.9k
  HWY_ALIGN float coeff[4 * 4];
423
72.9k
  AFVDCT4x4(block, coeff);
424
364k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.45M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.16M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.16M
    }
428
291k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
72.9k
  ComputeScaledDCT<4, 4>()(
431
72.9k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
72.9k
              pixels_stride),
433
72.9k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
364k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.62M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.33M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.33M
    }
439
291k
  }
440
  // 4x8 DCT of the other half of the block.
441
72.9k
  ComputeScaledDCT<4, 8>()(
442
72.9k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
72.9k
      block, scratch_space);
444
364k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.62M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.33M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.33M
    }
448
291k
  }
449
72.9k
  float block00 = coefficients[0] * 0.25f;
450
72.9k
  float block01 = coefficients[1];
451
72.9k
  float block10 = coefficients[8];
452
72.9k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
72.9k
  coefficients[1] = (block00 - block01) * 0.5f;
454
72.9k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
72.9k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
76.5k
                            float* JXL_RESTRICT coefficients) {
411
76.5k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
76.5k
  size_t afv_x = afv_kind & 1;
413
76.5k
  size_t afv_y = afv_kind / 2;
414
76.5k
  HWY_ALIGN float block[4 * 8] = {};
415
382k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.53M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.22M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.22M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.22M
    }
420
306k
  }
421
  // AFV coefficients in (even, even) positions.
422
76.5k
  HWY_ALIGN float coeff[4 * 4];
423
76.5k
  AFVDCT4x4(block, coeff);
424
382k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.53M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.22M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.22M
    }
428
306k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
76.5k
  ComputeScaledDCT<4, 4>()(
431
76.5k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
76.5k
              pixels_stride),
433
76.5k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
382k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.75M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.44M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.44M
    }
439
306k
  }
440
  // 4x8 DCT of the other half of the block.
441
76.5k
  ComputeScaledDCT<4, 8>()(
442
76.5k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
76.5k
      block, scratch_space);
444
382k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.75M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.44M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.44M
    }
448
306k
  }
449
76.5k
  float block00 = coefficients[0] * 0.25f;
450
76.5k
  float block01 = coefficients[1];
451
76.5k
  float block10 = coefficients[8];
452
76.5k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
76.5k
  coefficients[1] = (block00 - block01) * 0.5f;
454
76.5k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
76.5k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
104k
                            float* JXL_RESTRICT coefficients) {
411
104k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
104k
  size_t afv_x = afv_kind & 1;
413
104k
  size_t afv_y = afv_kind / 2;
414
104k
  HWY_ALIGN float block[4 * 8] = {};
415
521k
  for (size_t iy = 0; iy < 4; iy++) {
416
2.08M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.66M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.66M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.66M
    }
420
417k
  }
421
  // AFV coefficients in (even, even) positions.
422
104k
  HWY_ALIGN float coeff[4 * 4];
423
104k
  AFVDCT4x4(block, coeff);
424
521k
  for (size_t iy = 0; iy < 4; iy++) {
425
2.08M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.66M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.66M
    }
428
417k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
104k
  ComputeScaledDCT<4, 4>()(
431
104k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
104k
              pixels_stride),
433
104k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
521k
  for (size_t iy = 0; iy < 4; iy++) {
436
3.75M
    for (size_t ix = 0; ix < 8; ix++) {
437
3.33M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
3.33M
    }
439
417k
  }
440
  // 4x8 DCT of the other half of the block.
441
104k
  ComputeScaledDCT<4, 8>()(
442
104k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
104k
      block, scratch_space);
444
521k
  for (size_t iy = 0; iy < 4; iy++) {
445
3.75M
    for (size_t ix = 0; ix < 8; ix++) {
446
3.33M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
3.33M
    }
448
417k
  }
449
104k
  float block00 = coefficients[0] * 0.25f;
450
104k
  float block01 = coefficients[1];
451
104k
  float block10 = coefficients[8];
452
104k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
104k
  coefficients[1] = (block00 - block01) * 0.5f;
454
104k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
104k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
57.4k
                            float* JXL_RESTRICT coefficients) {
411
57.4k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
57.4k
  size_t afv_x = afv_kind & 1;
413
57.4k
  size_t afv_y = afv_kind / 2;
414
57.4k
  HWY_ALIGN float block[4 * 8] = {};
415
287k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.14M
    for (size_t ix = 0; ix < 4; ix++) {
417
919k
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
919k
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
919k
    }
420
229k
  }
421
  // AFV coefficients in (even, even) positions.
422
57.4k
  HWY_ALIGN float coeff[4 * 4];
423
57.4k
  AFVDCT4x4(block, coeff);
424
287k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.14M
    for (size_t ix = 0; ix < 4; ix++) {
426
919k
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
919k
    }
428
229k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
57.4k
  ComputeScaledDCT<4, 4>()(
431
57.4k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
57.4k
              pixels_stride),
433
57.4k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
287k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.06M
    for (size_t ix = 0; ix < 8; ix++) {
437
1.83M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
1.83M
    }
439
229k
  }
440
  // 4x8 DCT of the other half of the block.
441
57.4k
  ComputeScaledDCT<4, 8>()(
442
57.4k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
57.4k
      block, scratch_space);
444
287k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.06M
    for (size_t ix = 0; ix < 8; ix++) {
446
1.83M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
1.83M
    }
448
229k
  }
449
57.4k
  float block00 = coefficients[0] * 0.25f;
450
57.4k
  float block01 = coefficients[1];
451
57.4k
  float block10 = coefficients[8];
452
57.4k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
57.4k
  coefficients[1] = (block00 - block01) * 0.5f;
454
57.4k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
57.4k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
72.9k
                            float* JXL_RESTRICT coefficients) {
411
72.9k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
72.9k
  size_t afv_x = afv_kind & 1;
413
72.9k
  size_t afv_y = afv_kind / 2;
414
72.9k
  HWY_ALIGN float block[4 * 8] = {};
415
364k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.45M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.16M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.16M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.16M
    }
420
291k
  }
421
  // AFV coefficients in (even, even) positions.
422
72.9k
  HWY_ALIGN float coeff[4 * 4];
423
72.9k
  AFVDCT4x4(block, coeff);
424
364k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.45M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.16M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.16M
    }
428
291k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
72.9k
  ComputeScaledDCT<4, 4>()(
431
72.9k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
72.9k
              pixels_stride),
433
72.9k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
364k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.62M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.33M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.33M
    }
439
291k
  }
440
  // 4x8 DCT of the other half of the block.
441
72.9k
  ComputeScaledDCT<4, 8>()(
442
72.9k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
72.9k
      block, scratch_space);
444
364k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.62M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.33M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.33M
    }
448
291k
  }
449
72.9k
  float block00 = coefficients[0] * 0.25f;
450
72.9k
  float block01 = coefficients[1];
451
72.9k
  float block10 = coefficients[8];
452
72.9k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
72.9k
  coefficients[1] = (block00 - block01) * 0.5f;
454
72.9k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
72.9k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
76.5k
                            float* JXL_RESTRICT coefficients) {
411
76.5k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
76.5k
  size_t afv_x = afv_kind & 1;
413
76.5k
  size_t afv_y = afv_kind / 2;
414
76.5k
  HWY_ALIGN float block[4 * 8] = {};
415
382k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.53M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.22M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.22M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.22M
    }
420
306k
  }
421
  // AFV coefficients in (even, even) positions.
422
76.5k
  HWY_ALIGN float coeff[4 * 4];
423
76.5k
  AFVDCT4x4(block, coeff);
424
382k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.53M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.22M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.22M
    }
428
306k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
76.5k
  ComputeScaledDCT<4, 4>()(
431
76.5k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
76.5k
              pixels_stride),
433
76.5k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
382k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.75M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.44M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.44M
    }
439
306k
  }
440
  // 4x8 DCT of the other half of the block.
441
76.5k
  ComputeScaledDCT<4, 8>()(
442
76.5k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
76.5k
      block, scratch_space);
444
382k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.75M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.44M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.44M
    }
448
306k
  }
449
76.5k
  float block00 = coefficients[0] * 0.25f;
450
76.5k
  float block01 = coefficients[1];
451
76.5k
  float block10 = coefficients[8];
452
76.5k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
76.5k
  coefficients[1] = (block00 - block01) * 0.5f;
454
76.5k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
76.5k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
12.3M
                            float* JXL_RESTRICT coefficients) {
411
12.3M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
12.3M
  size_t afv_x = afv_kind & 1;
413
12.3M
  size_t afv_y = afv_kind / 2;
414
12.3M
  HWY_ALIGN float block[4 * 8] = {};
415
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
416
247M
    for (size_t ix = 0; ix < 4; ix++) {
417
198M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
198M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
198M
    }
420
49.5M
  }
421
  // AFV coefficients in (even, even) positions.
422
12.3M
  HWY_ALIGN float coeff[4 * 4];
423
12.3M
  AFVDCT4x4(block, coeff);
424
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
425
247M
    for (size_t ix = 0; ix < 4; ix++) {
426
198M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
198M
    }
428
49.5M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
12.3M
  ComputeScaledDCT<4, 4>()(
431
12.3M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
12.3M
              pixels_stride),
433
12.3M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
436
445M
    for (size_t ix = 0; ix < 8; ix++) {
437
396M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
396M
    }
439
49.5M
  }
440
  // 4x8 DCT of the other half of the block.
441
12.3M
  ComputeScaledDCT<4, 8>()(
442
12.3M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
12.3M
      block, scratch_space);
444
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
445
445M
    for (size_t ix = 0; ix < 8; ix++) {
446
396M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
396M
    }
448
49.5M
  }
449
12.3M
  float block00 = coefficients[0] * 0.25f;
450
12.3M
  float block01 = coefficients[1];
451
12.3M
  float block10 = coefficients[8];
452
12.3M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
12.3M
  coefficients[1] = (block00 - block01) * 0.5f;
454
12.3M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
12.3M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
12.3M
                            float* JXL_RESTRICT coefficients) {
411
12.3M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
12.3M
  size_t afv_x = afv_kind & 1;
413
12.3M
  size_t afv_y = afv_kind / 2;
414
12.3M
  HWY_ALIGN float block[4 * 8] = {};
415
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
416
247M
    for (size_t ix = 0; ix < 4; ix++) {
417
198M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
198M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
198M
    }
420
49.5M
  }
421
  // AFV coefficients in (even, even) positions.
422
12.3M
  HWY_ALIGN float coeff[4 * 4];
423
12.3M
  AFVDCT4x4(block, coeff);
424
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
425
247M
    for (size_t ix = 0; ix < 4; ix++) {
426
198M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
198M
    }
428
49.5M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
12.3M
  ComputeScaledDCT<4, 4>()(
431
12.3M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
12.3M
              pixels_stride),
433
12.3M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
436
445M
    for (size_t ix = 0; ix < 8; ix++) {
437
396M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
396M
    }
439
49.5M
  }
440
  // 4x8 DCT of the other half of the block.
441
12.3M
  ComputeScaledDCT<4, 8>()(
442
12.3M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
12.3M
      block, scratch_space);
444
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
445
445M
    for (size_t ix = 0; ix < 8; ix++) {
446
396M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
396M
    }
448
49.5M
  }
449
12.3M
  float block00 = coefficients[0] * 0.25f;
450
12.3M
  float block01 = coefficients[1];
451
12.3M
  float block10 = coefficients[8];
452
12.3M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
12.3M
  coefficients[1] = (block00 - block01) * 0.5f;
454
12.3M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
12.3M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
12.3M
                            float* JXL_RESTRICT coefficients) {
411
12.3M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
12.3M
  size_t afv_x = afv_kind & 1;
413
12.3M
  size_t afv_y = afv_kind / 2;
414
12.3M
  HWY_ALIGN float block[4 * 8] = {};
415
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
416
247M
    for (size_t ix = 0; ix < 4; ix++) {
417
198M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
198M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
198M
    }
420
49.5M
  }
421
  // AFV coefficients in (even, even) positions.
422
12.3M
  HWY_ALIGN float coeff[4 * 4];
423
12.3M
  AFVDCT4x4(block, coeff);
424
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
425
247M
    for (size_t ix = 0; ix < 4; ix++) {
426
198M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
198M
    }
428
49.5M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
12.3M
  ComputeScaledDCT<4, 4>()(
431
12.3M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
12.3M
              pixels_stride),
433
12.3M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
436
445M
    for (size_t ix = 0; ix < 8; ix++) {
437
396M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
396M
    }
439
49.5M
  }
440
  // 4x8 DCT of the other half of the block.
441
12.3M
  ComputeScaledDCT<4, 8>()(
442
12.3M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
12.3M
      block, scratch_space);
444
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
445
445M
    for (size_t ix = 0; ix < 8; ix++) {
446
396M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
396M
    }
448
49.5M
  }
449
12.3M
  float block00 = coefficients[0] * 0.25f;
450
12.3M
  float block01 = coefficients[1];
451
12.3M
  float block10 = coefficients[8];
452
12.3M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
12.3M
  coefficients[1] = (block00 - block01) * 0.5f;
454
12.3M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
12.3M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
12.3M
                            float* JXL_RESTRICT coefficients) {
411
12.3M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
12.3M
  size_t afv_x = afv_kind & 1;
413
12.3M
  size_t afv_y = afv_kind / 2;
414
12.3M
  HWY_ALIGN float block[4 * 8] = {};
415
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
416
247M
    for (size_t ix = 0; ix < 4; ix++) {
417
198M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
198M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
198M
    }
420
49.5M
  }
421
  // AFV coefficients in (even, even) positions.
422
12.3M
  HWY_ALIGN float coeff[4 * 4];
423
12.3M
  AFVDCT4x4(block, coeff);
424
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
425
247M
    for (size_t ix = 0; ix < 4; ix++) {
426
198M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
198M
    }
428
49.5M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
12.3M
  ComputeScaledDCT<4, 4>()(
431
12.3M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
12.3M
              pixels_stride),
433
12.3M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
436
445M
    for (size_t ix = 0; ix < 8; ix++) {
437
396M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
396M
    }
439
49.5M
  }
440
  // 4x8 DCT of the other half of the block.
441
12.3M
  ComputeScaledDCT<4, 8>()(
442
12.3M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
12.3M
      block, scratch_space);
444
61.9M
  for (size_t iy = 0; iy < 4; iy++) {
445
445M
    for (size_t ix = 0; ix < 8; ix++) {
446
396M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
396M
    }
448
49.5M
  }
449
12.3M
  float block00 = coefficients[0] * 0.25f;
450
12.3M
  float block01 = coefficients[1];
451
12.3M
  float block10 = coefficients[8];
452
12.3M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
12.3M
  coefficients[1] = (block00 - block01) * 0.5f;
454
12.3M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
12.3M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
456
457
HWY_MAYBE_UNUSED void TransformFromPixels(const AcStrategyType strategy,
458
                                          const float* JXL_RESTRICT pixels,
459
                                          size_t pixels_stride,
460
                                          float* JXL_RESTRICT coefficients,
461
178M
                                          float* JXL_RESTRICT scratch_space) {
462
178M
  using Type = AcStrategyType;
463
178M
  switch (strategy) {
464
13.8M
    case Type::IDENTITY: {
465
41.4M
      for (size_t y = 0; y < 2; y++) {
466
82.9M
        for (size_t x = 0; x < 2; x++) {
467
55.3M
          float block_dc = 0;
468
276M
          for (size_t iy = 0; iy < 4; iy++) {
469
1.10G
            for (size_t ix = 0; ix < 4; ix++) {
470
884M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
884M
            }
472
221M
          }
473
55.3M
          block_dc *= 1.0f / 16;
474
276M
          for (size_t iy = 0; iy < 4; iy++) {
475
1.10G
            for (size_t ix = 0; ix < 4; ix++) {
476
884M
              if (ix == 1 && iy == 1) continue;
477
829M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
829M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
829M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
829M
            }
481
221M
          }
482
55.3M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
55.3M
          coefficients[y * 8 + x] = block_dc;
484
55.3M
        }
485
27.6M
      }
486
13.8M
      float block00 = coefficients[0];
487
13.8M
      float block01 = coefficients[1];
488
13.8M
      float block10 = coefficients[8];
489
13.8M
      float block11 = coefficients[9];
490
13.8M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
13.8M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
13.8M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
13.8M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
13.8M
      break;
495
0
    }
496
12.6M
    case Type::DCT8X4: {
497
38.0M
      for (size_t x = 0; x < 2; x++) {
498
25.3M
        HWY_ALIGN float block[4 * 8];
499
25.3M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
25.3M
                                 scratch_space);
501
126M
        for (size_t iy = 0; iy < 4; iy++) {
502
913M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
812M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
812M
          }
506
101M
        }
507
25.3M
      }
508
12.6M
      float block0 = coefficients[0];
509
12.6M
      float block1 = coefficients[8];
510
12.6M
      coefficients[0] = (block0 + block1) * 0.5f;
511
12.6M
      coefficients[8] = (block0 - block1) * 0.5f;
512
12.6M
      break;
513
0
    }
514
12.5M
    case Type::DCT4X8: {
515
37.5M
      for (size_t y = 0; y < 2; y++) {
516
25.0M
        HWY_ALIGN float block[4 * 8];
517
25.0M
        ComputeScaledDCT<4, 8>()(
518
25.0M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
25.0M
            scratch_space);
520
125M
        for (size_t iy = 0; iy < 4; iy++) {
521
902M
          for (size_t ix = 0; ix < 8; ix++) {
522
801M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
801M
          }
524
100M
        }
525
25.0M
      }
526
12.5M
      float block0 = coefficients[0];
527
12.5M
      float block1 = coefficients[8];
528
12.5M
      coefficients[0] = (block0 + block1) * 0.5f;
529
12.5M
      coefficients[8] = (block0 - block1) * 0.5f;
530
12.5M
      break;
531
0
    }
532
12.3M
    case Type::DCT4X4: {
533
37.1M
      for (size_t y = 0; y < 2; y++) {
534
74.3M
        for (size_t x = 0; x < 2; x++) {
535
49.5M
          HWY_ALIGN float block[4 * 4];
536
49.5M
          ComputeScaledDCT<4, 4>()(
537
49.5M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
49.5M
              block, scratch_space);
539
247M
          for (size_t iy = 0; iy < 4; iy++) {
540
991M
            for (size_t ix = 0; ix < 4; ix++) {
541
792M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
792M
            }
543
198M
          }
544
49.5M
        }
545
24.7M
      }
546
12.3M
      float block00 = coefficients[0];
547
12.3M
      float block01 = coefficients[1];
548
12.3M
      float block10 = coefficients[8];
549
12.3M
      float block11 = coefficients[9];
550
12.3M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
12.3M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
12.3M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
12.3M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
12.3M
      break;
555
0
    }
556
16.4M
    case Type::DCT2X2: {
557
16.4M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
16.4M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
16.4M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
16.4M
      break;
561
0
    }
562
5.36M
    case Type::DCT16X16: {
563
5.36M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
5.36M
                                 scratch_space);
565
5.36M
      break;
566
0
    }
567
10.3M
    case Type::DCT16X8: {
568
10.3M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
10.3M
                                scratch_space);
570
10.3M
      break;
571
0
    }
572
10.4M
    case Type::DCT8X16: {
573
10.4M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
10.4M
                                scratch_space);
575
10.4M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
2.07M
    case Type::DCT32X16: {
588
2.07M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
2.07M
                                 scratch_space);
590
2.07M
      break;
591
0
    }
592
2.05M
    case Type::DCT16X32: {
593
2.05M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
2.05M
                                 scratch_space);
595
2.05M
      break;
596
0
    }
597
1.18M
    case Type::DCT32X32: {
598
1.18M
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
1.18M
                                 scratch_space);
600
1.18M
      break;
601
0
    }
602
27.8M
    case Type::DCT: {
603
27.8M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
27.8M
                               scratch_space);
605
27.8M
      break;
606
0
    }
607
12.5M
    case Type::AFV0: {
608
12.5M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
12.5M
      break;
610
0
    }
611
12.5M
    case Type::AFV1: {
612
12.5M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
12.5M
      break;
614
0
    }
615
12.5M
    case Type::AFV2: {
616
12.5M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
12.5M
      break;
618
0
    }
619
12.5M
    case Type::AFV3: {
620
12.5M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
12.5M
      break;
622
0
    }
623
268k
    case Type::DCT64X64: {
624
268k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
268k
                                 scratch_space);
626
268k
      break;
627
0
    }
628
623k
    case Type::DCT64X32: {
629
623k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
623k
                                 scratch_space);
631
623k
      break;
632
0
    }
633
382k
    case Type::DCT32X64: {
634
382k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
382k
                                 scratch_space);
636
382k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
178M
  }
669
178M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
5.71M
                                          float* JXL_RESTRICT scratch_space) {
462
5.71M
  using Type = AcStrategyType;
463
5.71M
  switch (strategy) {
464
719k
    case Type::IDENTITY: {
465
2.15M
      for (size_t y = 0; y < 2; y++) {
466
4.31M
        for (size_t x = 0; x < 2; x++) {
467
2.87M
          float block_dc = 0;
468
14.3M
          for (size_t iy = 0; iy < 4; iy++) {
469
57.5M
            for (size_t ix = 0; ix < 4; ix++) {
470
46.0M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
46.0M
            }
472
11.5M
          }
473
2.87M
          block_dc *= 1.0f / 16;
474
14.3M
          for (size_t iy = 0; iy < 4; iy++) {
475
57.5M
            for (size_t ix = 0; ix < 4; ix++) {
476
46.0M
              if (ix == 1 && iy == 1) continue;
477
43.1M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
43.1M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
43.1M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
43.1M
            }
481
11.5M
          }
482
2.87M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
2.87M
          coefficients[y * 8 + x] = block_dc;
484
2.87M
        }
485
1.43M
      }
486
719k
      float block00 = coefficients[0];
487
719k
      float block01 = coefficients[1];
488
719k
      float block10 = coefficients[8];
489
719k
      float block11 = coefficients[9];
490
719k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
719k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
719k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
719k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
719k
      break;
495
0
    }
496
152k
    case Type::DCT8X4: {
497
457k
      for (size_t x = 0; x < 2; x++) {
498
305k
        HWY_ALIGN float block[4 * 8];
499
305k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
305k
                                 scratch_space);
501
1.52M
        for (size_t iy = 0; iy < 4; iy++) {
502
10.9M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
9.76M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
9.76M
          }
506
1.22M
        }
507
305k
      }
508
152k
      float block0 = coefficients[0];
509
152k
      float block1 = coefficients[8];
510
152k
      coefficients[0] = (block0 + block1) * 0.5f;
511
152k
      coefficients[8] = (block0 - block1) * 0.5f;
512
152k
      break;
513
0
    }
514
70.3k
    case Type::DCT4X8: {
515
210k
      for (size_t y = 0; y < 2; y++) {
516
140k
        HWY_ALIGN float block[4 * 8];
517
140k
        ComputeScaledDCT<4, 8>()(
518
140k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
140k
            scratch_space);
520
703k
        for (size_t iy = 0; iy < 4; iy++) {
521
5.06M
          for (size_t ix = 0; ix < 8; ix++) {
522
4.49M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
4.49M
          }
524
562k
        }
525
140k
      }
526
70.3k
      float block0 = coefficients[0];
527
70.3k
      float block1 = coefficients[8];
528
70.3k
      coefficients[0] = (block0 + block1) * 0.5f;
529
70.3k
      coefficients[8] = (block0 - block1) * 0.5f;
530
70.3k
      break;
531
0
    }
532
759
    case Type::DCT4X4: {
533
2.27k
      for (size_t y = 0; y < 2; y++) {
534
4.55k
        for (size_t x = 0; x < 2; x++) {
535
3.03k
          HWY_ALIGN float block[4 * 4];
536
3.03k
          ComputeScaledDCT<4, 4>()(
537
3.03k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
3.03k
              block, scratch_space);
539
15.1k
          for (size_t iy = 0; iy < 4; iy++) {
540
60.7k
            for (size_t ix = 0; ix < 4; ix++) {
541
48.5k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
48.5k
            }
543
12.1k
          }
544
3.03k
        }
545
1.51k
      }
546
759
      float block00 = coefficients[0];
547
759
      float block01 = coefficients[1];
548
759
      float block10 = coefficients[8];
549
759
      float block11 = coefficients[9];
550
759
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
759
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
759
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
759
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
759
      break;
555
0
    }
556
2.04M
    case Type::DCT2X2: {
557
2.04M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
2.04M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
2.04M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
2.04M
      break;
561
0
    }
562
166k
    case Type::DCT16X16: {
563
166k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
166k
                                 scratch_space);
565
166k
      break;
566
0
    }
567
227k
    case Type::DCT16X8: {
568
227k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
227k
                                scratch_space);
570
227k
      break;
571
0
    }
572
244k
    case Type::DCT8X16: {
573
244k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
244k
                                scratch_space);
575
244k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
51.3k
    case Type::DCT32X16: {
588
51.3k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
51.3k
                                 scratch_space);
590
51.3k
      break;
591
0
    }
592
52.6k
    case Type::DCT16X32: {
593
52.6k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
52.6k
                                 scratch_space);
595
52.6k
      break;
596
0
    }
597
90.3k
    case Type::DCT32X32: {
598
90.3k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
90.3k
                                 scratch_space);
600
90.3k
      break;
601
0
    }
602
1.51M
    case Type::DCT: {
603
1.51M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
1.51M
                               scratch_space);
605
1.51M
      break;
606
0
    }
607
104k
    case Type::AFV0: {
608
104k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
104k
      break;
610
0
    }
611
57.4k
    case Type::AFV1: {
612
57.4k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
57.4k
      break;
614
0
    }
615
72.9k
    case Type::AFV2: {
616
72.9k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
72.9k
      break;
618
0
    }
619
76.5k
    case Type::AFV3: {
620
76.5k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
76.5k
      break;
622
0
    }
623
48.5k
    case Type::DCT64X64: {
624
48.5k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
48.5k
                                 scratch_space);
626
48.5k
      break;
627
0
    }
628
11.8k
    case Type::DCT64X32: {
629
11.8k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
11.8k
                                 scratch_space);
631
11.8k
      break;
632
0
    }
633
6.19k
    case Type::DCT32X64: {
634
6.19k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
6.19k
                                 scratch_space);
636
6.19k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
5.71M
  }
669
5.71M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
18.1M
                                          float* JXL_RESTRICT scratch_space) {
462
18.1M
  using Type = AcStrategyType;
463
18.1M
  switch (strategy) {
464
719k
    case Type::IDENTITY: {
465
2.15M
      for (size_t y = 0; y < 2; y++) {
466
4.31M
        for (size_t x = 0; x < 2; x++) {
467
2.87M
          float block_dc = 0;
468
14.3M
          for (size_t iy = 0; iy < 4; iy++) {
469
57.5M
            for (size_t ix = 0; ix < 4; ix++) {
470
46.0M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
46.0M
            }
472
11.5M
          }
473
2.87M
          block_dc *= 1.0f / 16;
474
14.3M
          for (size_t iy = 0; iy < 4; iy++) {
475
57.5M
            for (size_t ix = 0; ix < 4; ix++) {
476
46.0M
              if (ix == 1 && iy == 1) continue;
477
43.1M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
43.1M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
43.1M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
43.1M
            }
481
11.5M
          }
482
2.87M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
2.87M
          coefficients[y * 8 + x] = block_dc;
484
2.87M
        }
485
1.43M
      }
486
719k
      float block00 = coefficients[0];
487
719k
      float block01 = coefficients[1];
488
719k
      float block10 = coefficients[8];
489
719k
      float block11 = coefficients[9];
490
719k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
719k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
719k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
719k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
719k
      break;
495
0
    }
496
152k
    case Type::DCT8X4: {
497
457k
      for (size_t x = 0; x < 2; x++) {
498
305k
        HWY_ALIGN float block[4 * 8];
499
305k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
305k
                                 scratch_space);
501
1.52M
        for (size_t iy = 0; iy < 4; iy++) {
502
10.9M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
9.76M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
9.76M
          }
506
1.22M
        }
507
305k
      }
508
152k
      float block0 = coefficients[0];
509
152k
      float block1 = coefficients[8];
510
152k
      coefficients[0] = (block0 + block1) * 0.5f;
511
152k
      coefficients[8] = (block0 - block1) * 0.5f;
512
152k
      break;
513
0
    }
514
70.3k
    case Type::DCT4X8: {
515
210k
      for (size_t y = 0; y < 2; y++) {
516
140k
        HWY_ALIGN float block[4 * 8];
517
140k
        ComputeScaledDCT<4, 8>()(
518
140k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
140k
            scratch_space);
520
703k
        for (size_t iy = 0; iy < 4; iy++) {
521
5.06M
          for (size_t ix = 0; ix < 8; ix++) {
522
4.49M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
4.49M
          }
524
562k
        }
525
140k
      }
526
70.3k
      float block0 = coefficients[0];
527
70.3k
      float block1 = coefficients[8];
528
70.3k
      coefficients[0] = (block0 + block1) * 0.5f;
529
70.3k
      coefficients[8] = (block0 - block1) * 0.5f;
530
70.3k
      break;
531
0
    }
532
759
    case Type::DCT4X4: {
533
2.27k
      for (size_t y = 0; y < 2; y++) {
534
4.55k
        for (size_t x = 0; x < 2; x++) {
535
3.03k
          HWY_ALIGN float block[4 * 4];
536
3.03k
          ComputeScaledDCT<4, 4>()(
537
3.03k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
3.03k
              block, scratch_space);
539
15.1k
          for (size_t iy = 0; iy < 4; iy++) {
540
60.7k
            for (size_t ix = 0; ix < 4; ix++) {
541
48.5k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
48.5k
            }
543
12.1k
          }
544
3.03k
        }
545
1.51k
      }
546
759
      float block00 = coefficients[0];
547
759
      float block01 = coefficients[1];
548
759
      float block10 = coefficients[8];
549
759
      float block11 = coefficients[9];
550
759
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
759
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
759
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
759
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
759
      break;
555
0
    }
556
2.04M
    case Type::DCT2X2: {
557
2.04M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
2.04M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
2.04M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
2.04M
      break;
561
0
    }
562
166k
    case Type::DCT16X16: {
563
166k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
166k
                                 scratch_space);
565
166k
      break;
566
0
    }
567
227k
    case Type::DCT16X8: {
568
227k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
227k
                                scratch_space);
570
227k
      break;
571
0
    }
572
244k
    case Type::DCT8X16: {
573
244k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
244k
                                scratch_space);
575
244k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
51.3k
    case Type::DCT32X16: {
588
51.3k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
51.3k
                                 scratch_space);
590
51.3k
      break;
591
0
    }
592
52.6k
    case Type::DCT16X32: {
593
52.6k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
52.6k
                                 scratch_space);
595
52.6k
      break;
596
0
    }
597
90.3k
    case Type::DCT32X32: {
598
90.3k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
90.3k
                                 scratch_space);
600
90.3k
      break;
601
0
    }
602
13.9M
    case Type::DCT: {
603
13.9M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
13.9M
                               scratch_space);
605
13.9M
      break;
606
0
    }
607
104k
    case Type::AFV0: {
608
104k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
104k
      break;
610
0
    }
611
57.4k
    case Type::AFV1: {
612
57.4k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
57.4k
      break;
614
0
    }
615
72.9k
    case Type::AFV2: {
616
72.9k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
72.9k
      break;
618
0
    }
619
76.5k
    case Type::AFV3: {
620
76.5k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
76.5k
      break;
622
0
    }
623
48.5k
    case Type::DCT64X64: {
624
48.5k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
48.5k
                                 scratch_space);
626
48.5k
      break;
627
0
    }
628
11.8k
    case Type::DCT64X32: {
629
11.8k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
11.8k
                                 scratch_space);
631
11.8k
      break;
632
0
    }
633
6.19k
    case Type::DCT32X64: {
634
6.19k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
6.19k
                                 scratch_space);
636
6.19k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
18.1M
  }
669
18.1M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
154M
                                          float* JXL_RESTRICT scratch_space) {
462
154M
  using Type = AcStrategyType;
463
154M
  switch (strategy) {
464
12.3M
    case Type::IDENTITY: {
465
37.1M
      for (size_t y = 0; y < 2; y++) {
466
74.3M
        for (size_t x = 0; x < 2; x++) {
467
49.5M
          float block_dc = 0;
468
247M
          for (size_t iy = 0; iy < 4; iy++) {
469
991M
            for (size_t ix = 0; ix < 4; ix++) {
470
792M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
792M
            }
472
198M
          }
473
49.5M
          block_dc *= 1.0f / 16;
474
247M
          for (size_t iy = 0; iy < 4; iy++) {
475
991M
            for (size_t ix = 0; ix < 4; ix++) {
476
792M
              if (ix == 1 && iy == 1) continue;
477
743M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
743M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
743M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
743M
            }
481
198M
          }
482
49.5M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
49.5M
          coefficients[y * 8 + x] = block_dc;
484
49.5M
        }
485
24.7M
      }
486
12.3M
      float block00 = coefficients[0];
487
12.3M
      float block01 = coefficients[1];
488
12.3M
      float block10 = coefficients[8];
489
12.3M
      float block11 = coefficients[9];
490
12.3M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
12.3M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
12.3M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
12.3M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
12.3M
      break;
495
0
    }
496
12.3M
    case Type::DCT8X4: {
497
37.1M
      for (size_t x = 0; x < 2; x++) {
498
24.7M
        HWY_ALIGN float block[4 * 8];
499
24.7M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
24.7M
                                 scratch_space);
501
123M
        for (size_t iy = 0; iy < 4; iy++) {
502
891M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
792M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
792M
          }
506
99.1M
        }
507
24.7M
      }
508
12.3M
      float block0 = coefficients[0];
509
12.3M
      float block1 = coefficients[8];
510
12.3M
      coefficients[0] = (block0 + block1) * 0.5f;
511
12.3M
      coefficients[8] = (block0 - block1) * 0.5f;
512
12.3M
      break;
513
0
    }
514
12.3M
    case Type::DCT4X8: {
515
37.1M
      for (size_t y = 0; y < 2; y++) {
516
24.7M
        HWY_ALIGN float block[4 * 8];
517
24.7M
        ComputeScaledDCT<4, 8>()(
518
24.7M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
24.7M
            scratch_space);
520
123M
        for (size_t iy = 0; iy < 4; iy++) {
521
891M
          for (size_t ix = 0; ix < 8; ix++) {
522
792M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
792M
          }
524
99.1M
        }
525
24.7M
      }
526
12.3M
      float block0 = coefficients[0];
527
12.3M
      float block1 = coefficients[8];
528
12.3M
      coefficients[0] = (block0 + block1) * 0.5f;
529
12.3M
      coefficients[8] = (block0 - block1) * 0.5f;
530
12.3M
      break;
531
0
    }
532
12.3M
    case Type::DCT4X4: {
533
37.1M
      for (size_t y = 0; y < 2; y++) {
534
74.3M
        for (size_t x = 0; x < 2; x++) {
535
49.5M
          HWY_ALIGN float block[4 * 4];
536
49.5M
          ComputeScaledDCT<4, 4>()(
537
49.5M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
49.5M
              block, scratch_space);
539
247M
          for (size_t iy = 0; iy < 4; iy++) {
540
991M
            for (size_t ix = 0; ix < 4; ix++) {
541
792M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
792M
            }
543
198M
          }
544
49.5M
        }
545
24.7M
      }
546
12.3M
      float block00 = coefficients[0];
547
12.3M
      float block01 = coefficients[1];
548
12.3M
      float block10 = coefficients[8];
549
12.3M
      float block11 = coefficients[9];
550
12.3M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
12.3M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
12.3M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
12.3M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
12.3M
      break;
555
0
    }
556
12.3M
    case Type::DCT2X2: {
557
12.3M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
12.3M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
12.3M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
12.3M
      break;
561
0
    }
562
5.03M
    case Type::DCT16X16: {
563
5.03M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
5.03M
                                 scratch_space);
565
5.03M
      break;
566
0
    }
567
9.94M
    case Type::DCT16X8: {
568
9.94M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
9.94M
                                scratch_space);
570
9.94M
      break;
571
0
    }
572
9.92M
    case Type::DCT8X16: {
573
9.92M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
9.92M
                                scratch_space);
575
9.92M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
1.97M
    case Type::DCT32X16: {
588
1.97M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
1.97M
                                 scratch_space);
590
1.97M
      break;
591
0
    }
592
1.95M
    case Type::DCT16X32: {
593
1.95M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
1.95M
                                 scratch_space);
595
1.95M
      break;
596
0
    }
597
999k
    case Type::DCT32X32: {
598
999k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
999k
                                 scratch_space);
600
999k
      break;
601
0
    }
602
12.3M
    case Type::DCT: {
603
12.3M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
12.3M
                               scratch_space);
605
12.3M
      break;
606
0
    }
607
12.3M
    case Type::AFV0: {
608
12.3M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
12.3M
      break;
610
0
    }
611
12.3M
    case Type::AFV1: {
612
12.3M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
12.3M
      break;
614
0
    }
615
12.3M
    case Type::AFV2: {
616
12.3M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
12.3M
      break;
618
0
    }
619
12.3M
    case Type::AFV3: {
620
12.3M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
12.3M
      break;
622
0
    }
623
171k
    case Type::DCT64X64: {
624
171k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
171k
                                 scratch_space);
626
171k
      break;
627
0
    }
628
599k
    case Type::DCT64X32: {
629
599k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
599k
                                 scratch_space);
631
599k
      break;
632
0
    }
633
370k
    case Type::DCT32X64: {
634
370k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
370k
                                 scratch_space);
636
370k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
154M
  }
669
154M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
670
671
// `scratch_space` should be at least 4 * kMaxBlocks * kMaxBlocks elements.
672
HWY_MAYBE_UNUSED void DCFromLowestFrequencies(const AcStrategyType strategy,
673
                                              const float* block, float* dc,
674
                                              size_t dc_stride,
675
23.8M
                                              float* scratch_space) {
676
23.8M
  using Type = AcStrategyType;
677
23.8M
  switch (strategy) {
678
455k
    case Type::DCT16X8: {
679
455k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
455k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
455k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
455k
      break;
683
0
    }
684
489k
    case Type::DCT8X16: {
685
489k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
489k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
489k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
489k
      break;
689
0
    }
690
332k
    case Type::DCT16X16: {
691
332k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
332k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
332k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
332k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
102k
    case Type::DCT32X16: {
709
102k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
102k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
102k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
102k
      break;
713
0
    }
714
105k
    case Type::DCT16X32: {
715
105k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
105k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
105k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
105k
      break;
719
0
    }
720
180k
    case Type::DCT32X32: {
721
180k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
180k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
180k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
180k
      break;
725
0
    }
726
23.6k
    case Type::DCT64X32: {
727
23.6k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
23.6k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
23.6k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
23.6k
      break;
731
0
    }
732
12.3k
    case Type::DCT32X64: {
733
12.3k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
12.3k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
12.3k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
12.3k
      break;
737
0
    }
738
97.0k
    case Type::DCT64X64: {
739
97.0k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
97.0k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
97.0k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
97.0k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
15.4M
    case Type::DCT:
787
19.5M
    case Type::DCT2X2:
788
19.5M
    case Type::DCT4X4:
789
19.6M
    case Type::DCT4X8:
790
19.9M
    case Type::DCT8X4:
791
20.1M
    case Type::AFV0:
792
20.2M
    case Type::AFV1:
793
20.4M
    case Type::AFV2:
794
20.5M
    case Type::AFV3:
795
22.0M
    case Type::IDENTITY:
796
22.0M
      dc[0] = block[0];
797
22.0M
      break;
798
23.8M
  }
799
23.8M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
5.71M
                                              float* scratch_space) {
676
5.71M
  using Type = AcStrategyType;
677
5.71M
  switch (strategy) {
678
227k
    case Type::DCT16X8: {
679
227k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
227k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
227k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
227k
      break;
683
0
    }
684
244k
    case Type::DCT8X16: {
685
244k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
244k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
244k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
244k
      break;
689
0
    }
690
166k
    case Type::DCT16X16: {
691
166k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
166k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
166k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
166k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
51.3k
    case Type::DCT32X16: {
709
51.3k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
51.3k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
51.3k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
51.3k
      break;
713
0
    }
714
52.6k
    case Type::DCT16X32: {
715
52.6k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
52.6k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
52.6k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
52.6k
      break;
719
0
    }
720
90.3k
    case Type::DCT32X32: {
721
90.3k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
90.3k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
90.3k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
90.3k
      break;
725
0
    }
726
11.8k
    case Type::DCT64X32: {
727
11.8k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
11.8k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
11.8k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
11.8k
      break;
731
0
    }
732
6.19k
    case Type::DCT32X64: {
733
6.19k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
6.19k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
6.19k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
6.19k
      break;
737
0
    }
738
48.5k
    case Type::DCT64X64: {
739
48.5k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
48.5k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
48.5k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
48.5k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
1.51M
    case Type::DCT:
787
3.56M
    case Type::DCT2X2:
788
3.56M
    case Type::DCT4X4:
789
3.63M
    case Type::DCT4X8:
790
3.78M
    case Type::DCT8X4:
791
3.89M
    case Type::AFV0:
792
3.94M
    case Type::AFV1:
793
4.02M
    case Type::AFV2:
794
4.09M
    case Type::AFV3:
795
4.81M
    case Type::IDENTITY:
796
4.81M
      dc[0] = block[0];
797
4.81M
      break;
798
5.71M
  }
799
5.71M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
18.1M
                                              float* scratch_space) {
676
18.1M
  using Type = AcStrategyType;
677
18.1M
  switch (strategy) {
678
227k
    case Type::DCT16X8: {
679
227k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
227k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
227k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
227k
      break;
683
0
    }
684
244k
    case Type::DCT8X16: {
685
244k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
244k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
244k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
244k
      break;
689
0
    }
690
166k
    case Type::DCT16X16: {
691
166k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
166k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
166k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
166k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
51.3k
    case Type::DCT32X16: {
709
51.3k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
51.3k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
51.3k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
51.3k
      break;
713
0
    }
714
52.6k
    case Type::DCT16X32: {
715
52.6k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
52.6k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
52.6k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
52.6k
      break;
719
0
    }
720
90.3k
    case Type::DCT32X32: {
721
90.3k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
90.3k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
90.3k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
90.3k
      break;
725
0
    }
726
11.8k
    case Type::DCT64X32: {
727
11.8k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
11.8k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
11.8k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
11.8k
      break;
731
0
    }
732
6.19k
    case Type::DCT32X64: {
733
6.19k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
6.19k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
6.19k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
6.19k
      break;
737
0
    }
738
48.5k
    case Type::DCT64X64: {
739
48.5k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
48.5k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
48.5k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
48.5k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
13.9M
    case Type::DCT:
787
15.9M
    case Type::DCT2X2:
788
15.9M
    case Type::DCT4X4:
789
16.0M
    case Type::DCT4X8:
790
16.1M
    case Type::DCT8X4:
791
16.2M
    case Type::AFV0:
792
16.3M
    case Type::AFV1:
793
16.4M
    case Type::AFV2:
794
16.4M
    case Type::AFV3:
795
17.2M
    case Type::IDENTITY:
796
17.2M
      dc[0] = block[0];
797
17.2M
      break;
798
18.1M
  }
799
18.1M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
800
801
}  // namespace
802
// NOLINTNEXTLINE(google-readability-namespace-comments)
803
}  // namespace HWY_NAMESPACE
804
}  // namespace jxl
805
HWY_AFTER_NAMESPACE();
806
807
#endif  // LIB_JXL_ENC_TRANSFORMS_INL_H_