Coverage Report

Created: 2025-11-14 07:32

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/enc_transforms-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/base/compiler_specific.h"
7
#include "lib/jxl/frame_dimensions.h"
8
9
#if defined(LIB_JXL_ENC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
10
#ifdef LIB_JXL_ENC_TRANSFORMS_INL_H_
11
#undef LIB_JXL_ENC_TRANSFORMS_INL_H_
12
#else
13
#define LIB_JXL_ENC_TRANSFORMS_INL_H_
14
#endif
15
16
#include <cstddef>
17
#include <cstdint>
18
#include <hwy/highway.h>
19
20
#include "lib/jxl/ac_strategy.h"
21
#include "lib/jxl/dct-inl.h"
22
#include "lib/jxl/dct_scales.h"
23
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
27
enum class AcStrategyType : uint32_t;
28
29
namespace HWY_NAMESPACE {
30
namespace {
31
32
constexpr size_t kMaxBlocks = 32;
33
34
// Inverse of ReinterpretingDCT.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
HWY_INLINE void ReinterpretingIDCT(const float* input,
38
                                   const size_t input_stride, float* output,
39
2.01M
                                   const size_t output_stride, float* scratch) {
40
2.01M
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
2.01M
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
2.01M
  float* block = scratch;
43
2.01M
  if (ROWS < COLS) {
44
1.51M
    for (size_t y = 0; y < LF_ROWS; y++) {
45
3.27M
      for (size_t x = 0; x < LF_COLS; x++) {
46
2.44M
        block[y * COLS + x] = input[y * input_stride + x] *
47
2.44M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
2.44M
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
2.44M
      }
50
835k
    }
51
1.33M
  } else {
52
4.61M
    for (size_t y = 0; y < LF_COLS; y++) {
53
17.8M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
14.6M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
14.6M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
14.6M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
14.6M
      }
58
3.27M
    }
59
1.33M
  }
60
61
2.01M
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
2.01M
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
2.01M
                                  scratch_space);
64
2.01M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
257k
                                   const size_t output_stride, float* scratch) {
40
257k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
257k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
257k
  float* block = scratch;
43
257k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
257k
  } else {
52
515k
    for (size_t y = 0; y < LF_COLS; y++) {
53
773k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
515k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
515k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
515k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
515k
      }
58
257k
    }
59
257k
  }
60
61
257k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
257k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
257k
                                  scratch_space);
64
257k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
276k
                                   const size_t output_stride, float* scratch) {
40
276k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
276k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
276k
  float* block = scratch;
43
276k
  if (ROWS < COLS) {
44
552k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
829k
      for (size_t x = 0; x < LF_COLS; x++) {
46
552k
        block[y * COLS + x] = input[y * input_stride + x] *
47
552k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
552k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
552k
      }
50
276k
    }
51
276k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
276k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
276k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
276k
                                  scratch_space);
64
276k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
181k
                                   const size_t output_stride, float* scratch) {
40
181k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
181k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
181k
  float* block = scratch;
43
181k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
181k
  } else {
52
545k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.09M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
727k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
727k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
727k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
727k
      }
58
363k
    }
59
181k
  }
60
61
181k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
181k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
181k
                                  scratch_space);
64
181k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
57.7k
                                   const size_t output_stride, float* scratch) {
40
57.7k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
57.7k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
57.7k
  float* block = scratch;
43
57.7k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
57.7k
  } else {
52
173k
    for (size_t y = 0; y < LF_COLS; y++) {
53
577k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
462k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
462k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
462k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
462k
      }
58
115k
    }
59
57.7k
  }
60
61
57.7k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
57.7k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
57.7k
                                  scratch_space);
64
57.7k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
57.7k
                                   const size_t output_stride, float* scratch) {
40
57.7k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
57.7k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
57.7k
  float* block = scratch;
43
57.7k
  if (ROWS < COLS) {
44
173k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
577k
      for (size_t x = 0; x < LF_COLS; x++) {
46
462k
        block[y * COLS + x] = input[y * input_stride + x] *
47
462k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
462k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
462k
      }
50
115k
    }
51
57.7k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
57.7k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
57.7k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
57.7k
                                  scratch_space);
64
57.7k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
99.5k
                                   const size_t output_stride, float* scratch) {
40
99.5k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
99.5k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
99.5k
  float* block = scratch;
43
99.5k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
99.5k
  } else {
52
497k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.99M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
1.59M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
1.59M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
1.59M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
1.59M
      }
58
398k
    }
59
99.5k
  }
60
61
99.5k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
99.5k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
99.5k
                                  scratch_space);
64
99.5k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
16.2k
                                   const size_t output_stride, float* scratch) {
40
16.2k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
16.2k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
16.2k
  float* block = scratch;
43
16.2k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
16.2k
  } else {
52
81.1k
    for (size_t y = 0; y < LF_COLS; y++) {
53
584k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
519k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
519k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
519k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
519k
      }
58
64.8k
    }
59
16.2k
  }
60
61
16.2k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
16.2k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
16.2k
                                  scratch_space);
64
16.2k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
6.41k
                                   const size_t output_stride, float* scratch) {
40
6.41k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
6.41k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
6.41k
  float* block = scratch;
43
6.41k
  if (ROWS < COLS) {
44
32.0k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
230k
      for (size_t x = 0; x < LF_COLS; x++) {
46
205k
        block[y * COLS + x] = input[y * input_stride + x] *
47
205k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
205k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
205k
      }
50
25.6k
    }
51
6.41k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
6.41k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
6.41k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
6.41k
                                  scratch_space);
64
6.41k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
54.5k
                                   const size_t output_stride, float* scratch) {
40
54.5k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
54.5k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
54.5k
  float* block = scratch;
43
54.5k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
54.5k
  } else {
52
491k
    for (size_t y = 0; y < LF_COLS; y++) {
53
3.93M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
3.49M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
3.49M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
3.49M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
3.49M
      }
58
436k
    }
59
54.5k
  }
60
61
54.5k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
54.5k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
54.5k
                                  scratch_space);
64
54.5k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
257k
                                   const size_t output_stride, float* scratch) {
40
257k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
257k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
257k
  float* block = scratch;
43
257k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
257k
  } else {
52
515k
    for (size_t y = 0; y < LF_COLS; y++) {
53
773k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
515k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
515k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
515k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
515k
      }
58
257k
    }
59
257k
  }
60
61
257k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
257k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
257k
                                  scratch_space);
64
257k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
276k
                                   const size_t output_stride, float* scratch) {
40
276k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
276k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
276k
  float* block = scratch;
43
276k
  if (ROWS < COLS) {
44
552k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
829k
      for (size_t x = 0; x < LF_COLS; x++) {
46
552k
        block[y * COLS + x] = input[y * input_stride + x] *
47
552k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
552k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
552k
      }
50
276k
    }
51
276k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
276k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
276k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
276k
                                  scratch_space);
64
276k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
181k
                                   const size_t output_stride, float* scratch) {
40
181k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
181k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
181k
  float* block = scratch;
43
181k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
181k
  } else {
52
545k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.09M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
727k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
727k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
727k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
727k
      }
58
363k
    }
59
181k
  }
60
61
181k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
181k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
181k
                                  scratch_space);
64
181k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
57.7k
                                   const size_t output_stride, float* scratch) {
40
57.7k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
57.7k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
57.7k
  float* block = scratch;
43
57.7k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
57.7k
  } else {
52
173k
    for (size_t y = 0; y < LF_COLS; y++) {
53
577k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
462k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
462k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
462k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
462k
      }
58
115k
    }
59
57.7k
  }
60
61
57.7k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
57.7k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
57.7k
                                  scratch_space);
64
57.7k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
57.7k
                                   const size_t output_stride, float* scratch) {
40
57.7k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
57.7k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
57.7k
  float* block = scratch;
43
57.7k
  if (ROWS < COLS) {
44
173k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
577k
      for (size_t x = 0; x < LF_COLS; x++) {
46
462k
        block[y * COLS + x] = input[y * input_stride + x] *
47
462k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
462k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
462k
      }
50
115k
    }
51
57.7k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
57.7k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
57.7k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
57.7k
                                  scratch_space);
64
57.7k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
99.5k
                                   const size_t output_stride, float* scratch) {
40
99.5k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
99.5k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
99.5k
  float* block = scratch;
43
99.5k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
99.5k
  } else {
52
497k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.99M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
1.59M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
1.59M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
1.59M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
1.59M
      }
58
398k
    }
59
99.5k
  }
60
61
99.5k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
99.5k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
99.5k
                                  scratch_space);
64
99.5k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
16.2k
                                   const size_t output_stride, float* scratch) {
40
16.2k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
16.2k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
16.2k
  float* block = scratch;
43
16.2k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
16.2k
  } else {
52
81.1k
    for (size_t y = 0; y < LF_COLS; y++) {
53
584k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
519k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
519k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
519k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
519k
      }
58
64.8k
    }
59
16.2k
  }
60
61
16.2k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
16.2k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
16.2k
                                  scratch_space);
64
16.2k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
6.41k
                                   const size_t output_stride, float* scratch) {
40
6.41k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
6.41k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
6.41k
  float* block = scratch;
43
6.41k
  if (ROWS < COLS) {
44
32.0k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
230k
      for (size_t x = 0; x < LF_COLS; x++) {
46
205k
        block[y * COLS + x] = input[y * input_stride + x] *
47
205k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
205k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
205k
      }
50
25.6k
    }
51
6.41k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
6.41k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
6.41k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
6.41k
                                  scratch_space);
64
6.41k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
54.5k
                                   const size_t output_stride, float* scratch) {
40
54.5k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
54.5k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
54.5k
  float* block = scratch;
43
54.5k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
54.5k
  } else {
52
491k
    for (size_t y = 0; y < LF_COLS; y++) {
53
3.93M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
3.49M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
3.49M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
3.49M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
3.49M
      }
58
436k
    }
59
54.5k
  }
60
61
54.5k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
54.5k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
54.5k
                                  scratch_space);
64
54.5k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
65
66
template <size_t S>
67
55.3M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
55.3M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
55.3M
  static_assert(S % 2 == 0, "S should be even");
70
55.3M
  float temp[kDCTBlockSize];
71
55.3M
  constexpr size_t num_2x2 = S / 2;
72
184M
  for (size_t y = 0; y < num_2x2; y++) {
73
516M
    for (size_t x = 0; x < num_2x2; x++) {
74
387M
      float c00 = block[y * 2 * stride + x * 2];
75
387M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
387M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
387M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
387M
      float r00 = c00 + c01 + c10 + c11;
79
387M
      float r01 = c00 + c01 - c10 - c11;
80
387M
      float r10 = c00 - c01 + c10 - c11;
81
387M
      float r11 = c00 - c01 - c10 + c11;
82
387M
      r00 *= 0.25f;
83
387M
      r01 *= 0.25f;
84
387M
      r10 *= 0.25f;
85
387M
      r11 *= 0.25f;
86
387M
      temp[y * kBlockDim + x] = r00;
87
387M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
387M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
387M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
387M
    }
91
129M
  }
92
313M
  for (size_t y = 0; y < S; y++) {
93
1.80G
    for (size_t x = 0; x < S; x++) {
94
1.54G
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
1.54G
    }
96
258M
  }
97
55.3M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.25M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
2.25M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.25M
  static_assert(S % 2 == 0, "S should be even");
70
2.25M
  float temp[kDCTBlockSize];
71
2.25M
  constexpr size_t num_2x2 = S / 2;
72
11.2M
  for (size_t y = 0; y < num_2x2; y++) {
73
45.0M
    for (size_t x = 0; x < num_2x2; x++) {
74
36.0M
      float c00 = block[y * 2 * stride + x * 2];
75
36.0M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
36.0M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
36.0M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
36.0M
      float r00 = c00 + c01 + c10 + c11;
79
36.0M
      float r01 = c00 + c01 - c10 - c11;
80
36.0M
      float r10 = c00 - c01 + c10 - c11;
81
36.0M
      float r11 = c00 - c01 - c10 + c11;
82
36.0M
      r00 *= 0.25f;
83
36.0M
      r01 *= 0.25f;
84
36.0M
      r10 *= 0.25f;
85
36.0M
      r11 *= 0.25f;
86
36.0M
      temp[y * kBlockDim + x] = r00;
87
36.0M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
36.0M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
36.0M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
36.0M
    }
91
9.00M
  }
92
20.2M
  for (size_t y = 0; y < S; y++) {
93
162M
    for (size_t x = 0; x < S; x++) {
94
144M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
144M
    }
96
18.0M
  }
97
2.25M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.25M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
2.25M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.25M
  static_assert(S % 2 == 0, "S should be even");
70
2.25M
  float temp[kDCTBlockSize];
71
2.25M
  constexpr size_t num_2x2 = S / 2;
72
6.75M
  for (size_t y = 0; y < num_2x2; y++) {
73
13.5M
    for (size_t x = 0; x < num_2x2; x++) {
74
9.00M
      float c00 = block[y * 2 * stride + x * 2];
75
9.00M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
9.00M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
9.00M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
9.00M
      float r00 = c00 + c01 + c10 + c11;
79
9.00M
      float r01 = c00 + c01 - c10 - c11;
80
9.00M
      float r10 = c00 - c01 + c10 - c11;
81
9.00M
      float r11 = c00 - c01 - c10 + c11;
82
9.00M
      r00 *= 0.25f;
83
9.00M
      r01 *= 0.25f;
84
9.00M
      r10 *= 0.25f;
85
9.00M
      r11 *= 0.25f;
86
9.00M
      temp[y * kBlockDim + x] = r00;
87
9.00M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
9.00M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
9.00M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
9.00M
    }
91
4.50M
  }
92
11.2M
  for (size_t y = 0; y < S; y++) {
93
45.0M
    for (size_t x = 0; x < S; x++) {
94
36.0M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
36.0M
    }
96
9.00M
  }
97
2.25M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.25M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
2.25M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.25M
  static_assert(S % 2 == 0, "S should be even");
70
2.25M
  float temp[kDCTBlockSize];
71
2.25M
  constexpr size_t num_2x2 = S / 2;
72
4.50M
  for (size_t y = 0; y < num_2x2; y++) {
73
4.50M
    for (size_t x = 0; x < num_2x2; x++) {
74
2.25M
      float c00 = block[y * 2 * stride + x * 2];
75
2.25M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
2.25M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
2.25M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
2.25M
      float r00 = c00 + c01 + c10 + c11;
79
2.25M
      float r01 = c00 + c01 - c10 - c11;
80
2.25M
      float r10 = c00 - c01 + c10 - c11;
81
2.25M
      float r11 = c00 - c01 - c10 + c11;
82
2.25M
      r00 *= 0.25f;
83
2.25M
      r01 *= 0.25f;
84
2.25M
      r10 *= 0.25f;
85
2.25M
      r11 *= 0.25f;
86
2.25M
      temp[y * kBlockDim + x] = r00;
87
2.25M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
2.25M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
2.25M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
2.25M
    }
91
2.25M
  }
92
6.75M
  for (size_t y = 0; y < S; y++) {
93
13.5M
    for (size_t x = 0; x < S; x++) {
94
9.00M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
9.00M
    }
96
4.50M
  }
97
2.25M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.25M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
2.25M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.25M
  static_assert(S % 2 == 0, "S should be even");
70
2.25M
  float temp[kDCTBlockSize];
71
2.25M
  constexpr size_t num_2x2 = S / 2;
72
11.2M
  for (size_t y = 0; y < num_2x2; y++) {
73
45.0M
    for (size_t x = 0; x < num_2x2; x++) {
74
36.0M
      float c00 = block[y * 2 * stride + x * 2];
75
36.0M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
36.0M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
36.0M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
36.0M
      float r00 = c00 + c01 + c10 + c11;
79
36.0M
      float r01 = c00 + c01 - c10 - c11;
80
36.0M
      float r10 = c00 - c01 + c10 - c11;
81
36.0M
      float r11 = c00 - c01 - c10 + c11;
82
36.0M
      r00 *= 0.25f;
83
36.0M
      r01 *= 0.25f;
84
36.0M
      r10 *= 0.25f;
85
36.0M
      r11 *= 0.25f;
86
36.0M
      temp[y * kBlockDim + x] = r00;
87
36.0M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
36.0M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
36.0M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
36.0M
    }
91
9.00M
  }
92
20.2M
  for (size_t y = 0; y < S; y++) {
93
162M
    for (size_t x = 0; x < S; x++) {
94
144M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
144M
    }
96
18.0M
  }
97
2.25M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.25M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
2.25M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.25M
  static_assert(S % 2 == 0, "S should be even");
70
2.25M
  float temp[kDCTBlockSize];
71
2.25M
  constexpr size_t num_2x2 = S / 2;
72
6.75M
  for (size_t y = 0; y < num_2x2; y++) {
73
13.5M
    for (size_t x = 0; x < num_2x2; x++) {
74
9.00M
      float c00 = block[y * 2 * stride + x * 2];
75
9.00M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
9.00M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
9.00M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
9.00M
      float r00 = c00 + c01 + c10 + c11;
79
9.00M
      float r01 = c00 + c01 - c10 - c11;
80
9.00M
      float r10 = c00 - c01 + c10 - c11;
81
9.00M
      float r11 = c00 - c01 - c10 + c11;
82
9.00M
      r00 *= 0.25f;
83
9.00M
      r01 *= 0.25f;
84
9.00M
      r10 *= 0.25f;
85
9.00M
      r11 *= 0.25f;
86
9.00M
      temp[y * kBlockDim + x] = r00;
87
9.00M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
9.00M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
9.00M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
9.00M
    }
91
4.50M
  }
92
11.2M
  for (size_t y = 0; y < S; y++) {
93
45.0M
    for (size_t x = 0; x < S; x++) {
94
36.0M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
36.0M
    }
96
9.00M
  }
97
2.25M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.25M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
2.25M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.25M
  static_assert(S % 2 == 0, "S should be even");
70
2.25M
  float temp[kDCTBlockSize];
71
2.25M
  constexpr size_t num_2x2 = S / 2;
72
4.50M
  for (size_t y = 0; y < num_2x2; y++) {
73
4.50M
    for (size_t x = 0; x < num_2x2; x++) {
74
2.25M
      float c00 = block[y * 2 * stride + x * 2];
75
2.25M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
2.25M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
2.25M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
2.25M
      float r00 = c00 + c01 + c10 + c11;
79
2.25M
      float r01 = c00 + c01 - c10 - c11;
80
2.25M
      float r10 = c00 - c01 + c10 - c11;
81
2.25M
      float r11 = c00 - c01 - c10 + c11;
82
2.25M
      r00 *= 0.25f;
83
2.25M
      r01 *= 0.25f;
84
2.25M
      r10 *= 0.25f;
85
2.25M
      r11 *= 0.25f;
86
2.25M
      temp[y * kBlockDim + x] = r00;
87
2.25M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
2.25M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
2.25M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
2.25M
    }
91
2.25M
  }
92
6.75M
  for (size_t y = 0; y < S; y++) {
93
13.5M
    for (size_t x = 0; x < S; x++) {
94
9.00M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
9.00M
    }
96
4.50M
  }
97
2.25M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
13.9M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
13.9M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
13.9M
  static_assert(S % 2 == 0, "S should be even");
70
13.9M
  float temp[kDCTBlockSize];
71
13.9M
  constexpr size_t num_2x2 = S / 2;
72
69.7M
  for (size_t y = 0; y < num_2x2; y++) {
73
278M
    for (size_t x = 0; x < num_2x2; x++) {
74
223M
      float c00 = block[y * 2 * stride + x * 2];
75
223M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
223M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
223M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
223M
      float r00 = c00 + c01 + c10 + c11;
79
223M
      float r01 = c00 + c01 - c10 - c11;
80
223M
      float r10 = c00 - c01 + c10 - c11;
81
223M
      float r11 = c00 - c01 - c10 + c11;
82
223M
      r00 *= 0.25f;
83
223M
      r01 *= 0.25f;
84
223M
      r10 *= 0.25f;
85
223M
      r11 *= 0.25f;
86
223M
      temp[y * kBlockDim + x] = r00;
87
223M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
223M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
223M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
223M
    }
91
55.7M
  }
92
125M
  for (size_t y = 0; y < S; y++) {
93
1.00G
    for (size_t x = 0; x < S; x++) {
94
892M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
892M
    }
96
111M
  }
97
13.9M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
13.9M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
13.9M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
13.9M
  static_assert(S % 2 == 0, "S should be even");
70
13.9M
  float temp[kDCTBlockSize];
71
13.9M
  constexpr size_t num_2x2 = S / 2;
72
41.8M
  for (size_t y = 0; y < num_2x2; y++) {
73
83.6M
    for (size_t x = 0; x < num_2x2; x++) {
74
55.7M
      float c00 = block[y * 2 * stride + x * 2];
75
55.7M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
55.7M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
55.7M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
55.7M
      float r00 = c00 + c01 + c10 + c11;
79
55.7M
      float r01 = c00 + c01 - c10 - c11;
80
55.7M
      float r10 = c00 - c01 + c10 - c11;
81
55.7M
      float r11 = c00 - c01 - c10 + c11;
82
55.7M
      r00 *= 0.25f;
83
55.7M
      r01 *= 0.25f;
84
55.7M
      r10 *= 0.25f;
85
55.7M
      r11 *= 0.25f;
86
55.7M
      temp[y * kBlockDim + x] = r00;
87
55.7M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
55.7M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
55.7M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
55.7M
    }
91
27.8M
  }
92
69.7M
  for (size_t y = 0; y < S; y++) {
93
278M
    for (size_t x = 0; x < S; x++) {
94
223M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
223M
    }
96
55.7M
  }
97
13.9M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
13.9M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
13.9M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
13.9M
  static_assert(S % 2 == 0, "S should be even");
70
13.9M
  float temp[kDCTBlockSize];
71
13.9M
  constexpr size_t num_2x2 = S / 2;
72
27.8M
  for (size_t y = 0; y < num_2x2; y++) {
73
27.8M
    for (size_t x = 0; x < num_2x2; x++) {
74
13.9M
      float c00 = block[y * 2 * stride + x * 2];
75
13.9M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
13.9M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
13.9M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
13.9M
      float r00 = c00 + c01 + c10 + c11;
79
13.9M
      float r01 = c00 + c01 - c10 - c11;
80
13.9M
      float r10 = c00 - c01 + c10 - c11;
81
13.9M
      float r11 = c00 - c01 - c10 + c11;
82
13.9M
      r00 *= 0.25f;
83
13.9M
      r01 *= 0.25f;
84
13.9M
      r10 *= 0.25f;
85
13.9M
      r11 *= 0.25f;
86
13.9M
      temp[y * kBlockDim + x] = r00;
87
13.9M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
13.9M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
13.9M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
13.9M
    }
91
13.9M
  }
92
41.8M
  for (size_t y = 0; y < S; y++) {
93
83.6M
    for (size_t x = 0; x < S; x++) {
94
55.7M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
55.7M
    }
96
27.8M
  }
97
13.9M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
98
99
56.4M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
56.4M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
56.4M
      {
102
56.4M
          0.2500000000000000,
103
56.4M
          0.8769029297991420f,
104
56.4M
          0.0000000000000000,
105
56.4M
          0.0000000000000000,
106
56.4M
          0.0000000000000000,
107
56.4M
          -0.4105377591765233f,
108
56.4M
          0.0000000000000000,
109
56.4M
          0.0000000000000000,
110
56.4M
          0.0000000000000000,
111
56.4M
          0.0000000000000000,
112
56.4M
          0.0000000000000000,
113
56.4M
          0.0000000000000000,
114
56.4M
          0.0000000000000000,
115
56.4M
          0.0000000000000000,
116
56.4M
          0.0000000000000000,
117
56.4M
          0.0000000000000000,
118
56.4M
      },
119
56.4M
      {
120
56.4M
          0.2500000000000000,
121
56.4M
          0.2206518106944235f,
122
56.4M
          0.0000000000000000,
123
56.4M
          0.0000000000000000,
124
56.4M
          -0.7071067811865474f,
125
56.4M
          0.6235485373547691f,
126
56.4M
          0.0000000000000000,
127
56.4M
          0.0000000000000000,
128
56.4M
          0.0000000000000000,
129
56.4M
          0.0000000000000000,
130
56.4M
          0.0000000000000000,
131
56.4M
          0.0000000000000000,
132
56.4M
          0.0000000000000000,
133
56.4M
          0.0000000000000000,
134
56.4M
          0.0000000000000000,
135
56.4M
          0.0000000000000000,
136
56.4M
      },
137
56.4M
      {
138
56.4M
          0.2500000000000000,
139
56.4M
          -0.1014005039375376f,
140
56.4M
          0.4067007583026075f,
141
56.4M
          -0.2125574805828875f,
142
56.4M
          0.0000000000000000,
143
56.4M
          -0.0643507165794627f,
144
56.4M
          -0.4517556589999482f,
145
56.4M
          -0.3046847507248690f,
146
56.4M
          0.3017929516615495f,
147
56.4M
          0.4082482904638627f,
148
56.4M
          0.1747866975480809f,
149
56.4M
          -0.2110560104933578f,
150
56.4M
          -0.1426608480880726f,
151
56.4M
          -0.1381354035075859f,
152
56.4M
          -0.1743760259965107f,
153
56.4M
          0.1135498731499434f,
154
56.4M
      },
155
56.4M
      {
156
56.4M
          0.2500000000000000,
157
56.4M
          -0.1014005039375375f,
158
56.4M
          0.4444481661973445f,
159
56.4M
          0.3085497062849767f,
160
56.4M
          0.0000000000000000f,
161
56.4M
          -0.0643507165794627f,
162
56.4M
          0.1585450355184006f,
163
56.4M
          0.5112616136591823f,
164
56.4M
          0.2579236279634118f,
165
56.4M
          0.0000000000000000,
166
56.4M
          0.0812611176717539f,
167
56.4M
          0.1856718091610980f,
168
56.4M
          -0.3416446842253372f,
169
56.4M
          0.3302282550303788f,
170
56.4M
          0.0702790691196284f,
171
56.4M
          -0.0741750459581035f,
172
56.4M
      },
173
56.4M
      {
174
56.4M
          0.2500000000000000,
175
56.4M
          0.2206518106944236f,
176
56.4M
          0.0000000000000000,
177
56.4M
          0.0000000000000000,
178
56.4M
          0.7071067811865476f,
179
56.4M
          0.6235485373547694f,
180
56.4M
          0.0000000000000000,
181
56.4M
          0.0000000000000000,
182
56.4M
          0.0000000000000000,
183
56.4M
          0.0000000000000000,
184
56.4M
          0.0000000000000000,
185
56.4M
          0.0000000000000000,
186
56.4M
          0.0000000000000000,
187
56.4M
          0.0000000000000000,
188
56.4M
          0.0000000000000000,
189
56.4M
          0.0000000000000000,
190
56.4M
      },
191
56.4M
      {
192
56.4M
          0.2500000000000000,
193
56.4M
          -0.1014005039375378f,
194
56.4M
          0.0000000000000000,
195
56.4M
          0.4706702258572536f,
196
56.4M
          0.0000000000000000,
197
56.4M
          -0.0643507165794628f,
198
56.4M
          -0.0403851516082220f,
199
56.4M
          0.0000000000000000,
200
56.4M
          0.1627234014286620f,
201
56.4M
          0.0000000000000000,
202
56.4M
          0.0000000000000000,
203
56.4M
          0.0000000000000000,
204
56.4M
          0.7367497537172237f,
205
56.4M
          0.0875511500058708f,
206
56.4M
          -0.2921026642334881f,
207
56.4M
          0.1940289303259434f,
208
56.4M
      },
209
56.4M
      {
210
56.4M
          0.2500000000000000,
211
56.4M
          -0.1014005039375377f,
212
56.4M
          0.1957439937204294f,
213
56.4M
          -0.1621205195722993f,
214
56.4M
          0.0000000000000000,
215
56.4M
          -0.0643507165794628f,
216
56.4M
          0.0074182263792424f,
217
56.4M
          -0.2904801297289980f,
218
56.4M
          0.0952002265347504f,
219
56.4M
          0.0000000000000000,
220
56.4M
          -0.3675398009862027f,
221
56.4M
          0.4921585901373873f,
222
56.4M
          0.2462710772207515f,
223
56.4M
          -0.0794670660590957f,
224
56.4M
          0.3623817333531167f,
225
56.4M
          -0.4351904965232280f,
226
56.4M
      },
227
56.4M
      {
228
56.4M
          0.2500000000000000,
229
56.4M
          -0.1014005039375376f,
230
56.4M
          0.2929100136981264f,
231
56.4M
          0.0000000000000000,
232
56.4M
          0.0000000000000000,
233
56.4M
          -0.0643507165794627f,
234
56.4M
          0.3935103426921017f,
235
56.4M
          -0.0657870154914280f,
236
56.4M
          0.0000000000000000,
237
56.4M
          -0.4082482904638628f,
238
56.4M
          -0.3078822139579090f,
239
56.4M
          -0.3852501370925192f,
240
56.4M
          -0.0857401903551931f,
241
56.4M
          -0.4613374887461511f,
242
56.4M
          0.0000000000000000,
243
56.4M
          0.2191868483885747f,
244
56.4M
      },
245
56.4M
      {
246
56.4M
          0.2500000000000000,
247
56.4M
          -0.1014005039375376f,
248
56.4M
          -0.4067007583026072f,
249
56.4M
          -0.2125574805828705f,
250
56.4M
          0.0000000000000000,
251
56.4M
          -0.0643507165794627f,
252
56.4M
          -0.4517556589999464f,
253
56.4M
          0.3046847507248840f,
254
56.4M
          0.3017929516615503f,
255
56.4M
          -0.4082482904638635f,
256
56.4M
          -0.1747866975480813f,
257
56.4M
          0.2110560104933581f,
258
56.4M
          -0.1426608480880734f,
259
56.4M
          -0.1381354035075829f,
260
56.4M
          -0.1743760259965108f,
261
56.4M
          0.1135498731499426f,
262
56.4M
      },
263
56.4M
      {
264
56.4M
          0.2500000000000000,
265
56.4M
          -0.1014005039375377f,
266
56.4M
          -0.1957439937204287f,
267
56.4M
          -0.1621205195722833f,
268
56.4M
          0.0000000000000000,
269
56.4M
          -0.0643507165794628f,
270
56.4M
          0.0074182263792444f,
271
56.4M
          0.2904801297290076f,
272
56.4M
          0.0952002265347505f,
273
56.4M
          0.0000000000000000,
274
56.4M
          0.3675398009862011f,
275
56.4M
          -0.4921585901373891f,
276
56.4M
          0.2462710772207514f,
277
56.4M
          -0.0794670660591026f,
278
56.4M
          0.3623817333531165f,
279
56.4M
          -0.4351904965232251f,
280
56.4M
      },
281
56.4M
      {
282
56.4M
          0.2500000000000000,
283
56.4M
          -0.1014005039375375f,
284
56.4M
          0.0000000000000000,
285
56.4M
          -0.4706702258572528f,
286
56.4M
          0.0000000000000000,
287
56.4M
          -0.0643507165794627f,
288
56.4M
          0.1107416575309343f,
289
56.4M
          0.0000000000000000,
290
56.4M
          -0.1627234014286617f,
291
56.4M
          0.0000000000000000,
292
56.4M
          0.0000000000000000,
293
56.4M
          0.0000000000000000,
294
56.4M
          0.1488339922711357f,
295
56.4M
          0.4972464710953509f,
296
56.4M
          0.2921026642334879f,
297
56.4M
          0.5550443808910661f,
298
56.4M
      },
299
56.4M
      {
300
56.4M
          0.2500000000000000,
301
56.4M
          -0.1014005039375377f,
302
56.4M
          0.1137907446044809f,
303
56.4M
          -0.1464291867126764f,
304
56.4M
          0.0000000000000000,
305
56.4M
          -0.0643507165794628f,
306
56.4M
          0.0829816309488205f,
307
56.4M
          -0.2388977352334460f,
308
56.4M
          -0.3531238544981630f,
309
56.4M
          -0.4082482904638630f,
310
56.4M
          0.4826689115059883f,
311
56.4M
          0.1741941265991622f,
312
56.4M
          -0.0476868035022925f,
313
56.4M
          0.1253805944856366f,
314
56.4M
          -0.4326608024727445f,
315
56.4M
          -0.2546827712406646f,
316
56.4M
      },
317
56.4M
      {
318
56.4M
          0.2500000000000000,
319
56.4M
          -0.1014005039375377f,
320
56.4M
          -0.4444481661973438f,
321
56.4M
          0.3085497062849487f,
322
56.4M
          0.0000000000000000,
323
56.4M
          -0.0643507165794628f,
324
56.4M
          0.1585450355183970f,
325
56.4M
          -0.5112616136592012f,
326
56.4M
          0.2579236279634129f,
327
56.4M
          0.0000000000000000,
328
56.4M
          -0.0812611176717504f,
329
56.4M
          -0.1856718091610990f,
330
56.4M
          -0.3416446842253373f,
331
56.4M
          0.3302282550303805f,
332
56.4M
          0.0702790691196282f,
333
56.4M
          -0.0741750459581023f,
334
56.4M
      },
335
56.4M
      {
336
56.4M
          0.2500000000000000,
337
56.4M
          -0.1014005039375376f,
338
56.4M
          -0.2929100136981264f,
339
56.4M
          0.0000000000000000,
340
56.4M
          0.0000000000000000,
341
56.4M
          -0.0643507165794627f,
342
56.4M
          0.3935103426921022f,
343
56.4M
          0.0657870154914254f,
344
56.4M
          0.0000000000000000,
345
56.4M
          0.4082482904638634f,
346
56.4M
          0.3078822139579031f,
347
56.4M
          0.3852501370925211f,
348
56.4M
          -0.0857401903551927f,
349
56.4M
          -0.4613374887461554f,
350
56.4M
          0.0000000000000000,
351
56.4M
          0.2191868483885728f,
352
56.4M
      },
353
56.4M
      {
354
56.4M
          0.2500000000000000,
355
56.4M
          -0.1014005039375376f,
356
56.4M
          -0.1137907446044814f,
357
56.4M
          -0.1464291867126654f,
358
56.4M
          0.0000000000000000,
359
56.4M
          -0.0643507165794627f,
360
56.4M
          0.0829816309488214f,
361
56.4M
          0.2388977352334547f,
362
56.4M
          -0.3531238544981624f,
363
56.4M
          0.4082482904638630f,
364
56.4M
          -0.4826689115059858f,
365
56.4M
          -0.1741941265991621f,
366
56.4M
          -0.0476868035022928f,
367
56.4M
          0.1253805944856431f,
368
56.4M
          -0.4326608024727457f,
369
56.4M
          -0.2546827712406641f,
370
56.4M
      },
371
56.4M
      {
372
56.4M
          0.2500000000000000,
373
56.4M
          -0.1014005039375374f,
374
56.4M
          0.0000000000000000,
375
56.4M
          0.4251149611657548f,
376
56.4M
          0.0000000000000000,
377
56.4M
          -0.0643507165794626f,
378
56.4M
          -0.4517556589999480f,
379
56.4M
          0.0000000000000000,
380
56.4M
          -0.6035859033230976f,
381
56.4M
          0.0000000000000000,
382
56.4M
          0.0000000000000000,
383
56.4M
          0.0000000000000000,
384
56.4M
          -0.1426608480880724f,
385
56.4M
          -0.1381354035075845f,
386
56.4M
          0.3487520519930227f,
387
56.4M
          0.1135498731499429f,
388
56.4M
      },
389
56.4M
  };
390
391
56.4M
  const HWY_CAPPED(float, 16) d;
392
169M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
112M
    auto scalar = Zero(d);
394
1.91G
    for (size_t j = 0; j < 16; j++) {
395
1.80G
      auto px = Set(d, pixels[j]);
396
1.80G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
1.80G
      scalar = MulAdd(px, basis, scalar);
398
1.80G
    }
399
112M
    Store(scalar, d, coeffs + i);
400
112M
  }
401
56.4M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
343k
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
343k
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
343k
      {
102
343k
          0.2500000000000000,
103
343k
          0.8769029297991420f,
104
343k
          0.0000000000000000,
105
343k
          0.0000000000000000,
106
343k
          0.0000000000000000,
107
343k
          -0.4105377591765233f,
108
343k
          0.0000000000000000,
109
343k
          0.0000000000000000,
110
343k
          0.0000000000000000,
111
343k
          0.0000000000000000,
112
343k
          0.0000000000000000,
113
343k
          0.0000000000000000,
114
343k
          0.0000000000000000,
115
343k
          0.0000000000000000,
116
343k
          0.0000000000000000,
117
343k
          0.0000000000000000,
118
343k
      },
119
343k
      {
120
343k
          0.2500000000000000,
121
343k
          0.2206518106944235f,
122
343k
          0.0000000000000000,
123
343k
          0.0000000000000000,
124
343k
          -0.7071067811865474f,
125
343k
          0.6235485373547691f,
126
343k
          0.0000000000000000,
127
343k
          0.0000000000000000,
128
343k
          0.0000000000000000,
129
343k
          0.0000000000000000,
130
343k
          0.0000000000000000,
131
343k
          0.0000000000000000,
132
343k
          0.0000000000000000,
133
343k
          0.0000000000000000,
134
343k
          0.0000000000000000,
135
343k
          0.0000000000000000,
136
343k
      },
137
343k
      {
138
343k
          0.2500000000000000,
139
343k
          -0.1014005039375376f,
140
343k
          0.4067007583026075f,
141
343k
          -0.2125574805828875f,
142
343k
          0.0000000000000000,
143
343k
          -0.0643507165794627f,
144
343k
          -0.4517556589999482f,
145
343k
          -0.3046847507248690f,
146
343k
          0.3017929516615495f,
147
343k
          0.4082482904638627f,
148
343k
          0.1747866975480809f,
149
343k
          -0.2110560104933578f,
150
343k
          -0.1426608480880726f,
151
343k
          -0.1381354035075859f,
152
343k
          -0.1743760259965107f,
153
343k
          0.1135498731499434f,
154
343k
      },
155
343k
      {
156
343k
          0.2500000000000000,
157
343k
          -0.1014005039375375f,
158
343k
          0.4444481661973445f,
159
343k
          0.3085497062849767f,
160
343k
          0.0000000000000000f,
161
343k
          -0.0643507165794627f,
162
343k
          0.1585450355184006f,
163
343k
          0.5112616136591823f,
164
343k
          0.2579236279634118f,
165
343k
          0.0000000000000000,
166
343k
          0.0812611176717539f,
167
343k
          0.1856718091610980f,
168
343k
          -0.3416446842253372f,
169
343k
          0.3302282550303788f,
170
343k
          0.0702790691196284f,
171
343k
          -0.0741750459581035f,
172
343k
      },
173
343k
      {
174
343k
          0.2500000000000000,
175
343k
          0.2206518106944236f,
176
343k
          0.0000000000000000,
177
343k
          0.0000000000000000,
178
343k
          0.7071067811865476f,
179
343k
          0.6235485373547694f,
180
343k
          0.0000000000000000,
181
343k
          0.0000000000000000,
182
343k
          0.0000000000000000,
183
343k
          0.0000000000000000,
184
343k
          0.0000000000000000,
185
343k
          0.0000000000000000,
186
343k
          0.0000000000000000,
187
343k
          0.0000000000000000,
188
343k
          0.0000000000000000,
189
343k
          0.0000000000000000,
190
343k
      },
191
343k
      {
192
343k
          0.2500000000000000,
193
343k
          -0.1014005039375378f,
194
343k
          0.0000000000000000,
195
343k
          0.4706702258572536f,
196
343k
          0.0000000000000000,
197
343k
          -0.0643507165794628f,
198
343k
          -0.0403851516082220f,
199
343k
          0.0000000000000000,
200
343k
          0.1627234014286620f,
201
343k
          0.0000000000000000,
202
343k
          0.0000000000000000,
203
343k
          0.0000000000000000,
204
343k
          0.7367497537172237f,
205
343k
          0.0875511500058708f,
206
343k
          -0.2921026642334881f,
207
343k
          0.1940289303259434f,
208
343k
      },
209
343k
      {
210
343k
          0.2500000000000000,
211
343k
          -0.1014005039375377f,
212
343k
          0.1957439937204294f,
213
343k
          -0.1621205195722993f,
214
343k
          0.0000000000000000,
215
343k
          -0.0643507165794628f,
216
343k
          0.0074182263792424f,
217
343k
          -0.2904801297289980f,
218
343k
          0.0952002265347504f,
219
343k
          0.0000000000000000,
220
343k
          -0.3675398009862027f,
221
343k
          0.4921585901373873f,
222
343k
          0.2462710772207515f,
223
343k
          -0.0794670660590957f,
224
343k
          0.3623817333531167f,
225
343k
          -0.4351904965232280f,
226
343k
      },
227
343k
      {
228
343k
          0.2500000000000000,
229
343k
          -0.1014005039375376f,
230
343k
          0.2929100136981264f,
231
343k
          0.0000000000000000,
232
343k
          0.0000000000000000,
233
343k
          -0.0643507165794627f,
234
343k
          0.3935103426921017f,
235
343k
          -0.0657870154914280f,
236
343k
          0.0000000000000000,
237
343k
          -0.4082482904638628f,
238
343k
          -0.3078822139579090f,
239
343k
          -0.3852501370925192f,
240
343k
          -0.0857401903551931f,
241
343k
          -0.4613374887461511f,
242
343k
          0.0000000000000000,
243
343k
          0.2191868483885747f,
244
343k
      },
245
343k
      {
246
343k
          0.2500000000000000,
247
343k
          -0.1014005039375376f,
248
343k
          -0.4067007583026072f,
249
343k
          -0.2125574805828705f,
250
343k
          0.0000000000000000,
251
343k
          -0.0643507165794627f,
252
343k
          -0.4517556589999464f,
253
343k
          0.3046847507248840f,
254
343k
          0.3017929516615503f,
255
343k
          -0.4082482904638635f,
256
343k
          -0.1747866975480813f,
257
343k
          0.2110560104933581f,
258
343k
          -0.1426608480880734f,
259
343k
          -0.1381354035075829f,
260
343k
          -0.1743760259965108f,
261
343k
          0.1135498731499426f,
262
343k
      },
263
343k
      {
264
343k
          0.2500000000000000,
265
343k
          -0.1014005039375377f,
266
343k
          -0.1957439937204287f,
267
343k
          -0.1621205195722833f,
268
343k
          0.0000000000000000,
269
343k
          -0.0643507165794628f,
270
343k
          0.0074182263792444f,
271
343k
          0.2904801297290076f,
272
343k
          0.0952002265347505f,
273
343k
          0.0000000000000000,
274
343k
          0.3675398009862011f,
275
343k
          -0.4921585901373891f,
276
343k
          0.2462710772207514f,
277
343k
          -0.0794670660591026f,
278
343k
          0.3623817333531165f,
279
343k
          -0.4351904965232251f,
280
343k
      },
281
343k
      {
282
343k
          0.2500000000000000,
283
343k
          -0.1014005039375375f,
284
343k
          0.0000000000000000,
285
343k
          -0.4706702258572528f,
286
343k
          0.0000000000000000,
287
343k
          -0.0643507165794627f,
288
343k
          0.1107416575309343f,
289
343k
          0.0000000000000000,
290
343k
          -0.1627234014286617f,
291
343k
          0.0000000000000000,
292
343k
          0.0000000000000000,
293
343k
          0.0000000000000000,
294
343k
          0.1488339922711357f,
295
343k
          0.4972464710953509f,
296
343k
          0.2921026642334879f,
297
343k
          0.5550443808910661f,
298
343k
      },
299
343k
      {
300
343k
          0.2500000000000000,
301
343k
          -0.1014005039375377f,
302
343k
          0.1137907446044809f,
303
343k
          -0.1464291867126764f,
304
343k
          0.0000000000000000,
305
343k
          -0.0643507165794628f,
306
343k
          0.0829816309488205f,
307
343k
          -0.2388977352334460f,
308
343k
          -0.3531238544981630f,
309
343k
          -0.4082482904638630f,
310
343k
          0.4826689115059883f,
311
343k
          0.1741941265991622f,
312
343k
          -0.0476868035022925f,
313
343k
          0.1253805944856366f,
314
343k
          -0.4326608024727445f,
315
343k
          -0.2546827712406646f,
316
343k
      },
317
343k
      {
318
343k
          0.2500000000000000,
319
343k
          -0.1014005039375377f,
320
343k
          -0.4444481661973438f,
321
343k
          0.3085497062849487f,
322
343k
          0.0000000000000000,
323
343k
          -0.0643507165794628f,
324
343k
          0.1585450355183970f,
325
343k
          -0.5112616136592012f,
326
343k
          0.2579236279634129f,
327
343k
          0.0000000000000000,
328
343k
          -0.0812611176717504f,
329
343k
          -0.1856718091610990f,
330
343k
          -0.3416446842253373f,
331
343k
          0.3302282550303805f,
332
343k
          0.0702790691196282f,
333
343k
          -0.0741750459581023f,
334
343k
      },
335
343k
      {
336
343k
          0.2500000000000000,
337
343k
          -0.1014005039375376f,
338
343k
          -0.2929100136981264f,
339
343k
          0.0000000000000000,
340
343k
          0.0000000000000000,
341
343k
          -0.0643507165794627f,
342
343k
          0.3935103426921022f,
343
343k
          0.0657870154914254f,
344
343k
          0.0000000000000000,
345
343k
          0.4082482904638634f,
346
343k
          0.3078822139579031f,
347
343k
          0.3852501370925211f,
348
343k
          -0.0857401903551927f,
349
343k
          -0.4613374887461554f,
350
343k
          0.0000000000000000,
351
343k
          0.2191868483885728f,
352
343k
      },
353
343k
      {
354
343k
          0.2500000000000000,
355
343k
          -0.1014005039375376f,
356
343k
          -0.1137907446044814f,
357
343k
          -0.1464291867126654f,
358
343k
          0.0000000000000000,
359
343k
          -0.0643507165794627f,
360
343k
          0.0829816309488214f,
361
343k
          0.2388977352334547f,
362
343k
          -0.3531238544981624f,
363
343k
          0.4082482904638630f,
364
343k
          -0.4826689115059858f,
365
343k
          -0.1741941265991621f,
366
343k
          -0.0476868035022928f,
367
343k
          0.1253805944856431f,
368
343k
          -0.4326608024727457f,
369
343k
          -0.2546827712406641f,
370
343k
      },
371
343k
      {
372
343k
          0.2500000000000000,
373
343k
          -0.1014005039375374f,
374
343k
          0.0000000000000000,
375
343k
          0.4251149611657548f,
376
343k
          0.0000000000000000,
377
343k
          -0.0643507165794626f,
378
343k
          -0.4517556589999480f,
379
343k
          0.0000000000000000,
380
343k
          -0.6035859033230976f,
381
343k
          0.0000000000000000,
382
343k
          0.0000000000000000,
383
343k
          0.0000000000000000,
384
343k
          -0.1426608480880724f,
385
343k
          -0.1381354035075845f,
386
343k
          0.3487520519930227f,
387
343k
          0.1135498731499429f,
388
343k
      },
389
343k
  };
390
391
343k
  const HWY_CAPPED(float, 16) d;
392
1.02M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
686k
    auto scalar = Zero(d);
394
11.6M
    for (size_t j = 0; j < 16; j++) {
395
10.9M
      auto px = Set(d, pixels[j]);
396
10.9M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
10.9M
      scalar = MulAdd(px, basis, scalar);
398
10.9M
    }
399
686k
    Store(scalar, d, coeffs + i);
400
686k
  }
401
343k
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
343k
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
343k
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
343k
      {
102
343k
          0.2500000000000000,
103
343k
          0.8769029297991420f,
104
343k
          0.0000000000000000,
105
343k
          0.0000000000000000,
106
343k
          0.0000000000000000,
107
343k
          -0.4105377591765233f,
108
343k
          0.0000000000000000,
109
343k
          0.0000000000000000,
110
343k
          0.0000000000000000,
111
343k
          0.0000000000000000,
112
343k
          0.0000000000000000,
113
343k
          0.0000000000000000,
114
343k
          0.0000000000000000,
115
343k
          0.0000000000000000,
116
343k
          0.0000000000000000,
117
343k
          0.0000000000000000,
118
343k
      },
119
343k
      {
120
343k
          0.2500000000000000,
121
343k
          0.2206518106944235f,
122
343k
          0.0000000000000000,
123
343k
          0.0000000000000000,
124
343k
          -0.7071067811865474f,
125
343k
          0.6235485373547691f,
126
343k
          0.0000000000000000,
127
343k
          0.0000000000000000,
128
343k
          0.0000000000000000,
129
343k
          0.0000000000000000,
130
343k
          0.0000000000000000,
131
343k
          0.0000000000000000,
132
343k
          0.0000000000000000,
133
343k
          0.0000000000000000,
134
343k
          0.0000000000000000,
135
343k
          0.0000000000000000,
136
343k
      },
137
343k
      {
138
343k
          0.2500000000000000,
139
343k
          -0.1014005039375376f,
140
343k
          0.4067007583026075f,
141
343k
          -0.2125574805828875f,
142
343k
          0.0000000000000000,
143
343k
          -0.0643507165794627f,
144
343k
          -0.4517556589999482f,
145
343k
          -0.3046847507248690f,
146
343k
          0.3017929516615495f,
147
343k
          0.4082482904638627f,
148
343k
          0.1747866975480809f,
149
343k
          -0.2110560104933578f,
150
343k
          -0.1426608480880726f,
151
343k
          -0.1381354035075859f,
152
343k
          -0.1743760259965107f,
153
343k
          0.1135498731499434f,
154
343k
      },
155
343k
      {
156
343k
          0.2500000000000000,
157
343k
          -0.1014005039375375f,
158
343k
          0.4444481661973445f,
159
343k
          0.3085497062849767f,
160
343k
          0.0000000000000000f,
161
343k
          -0.0643507165794627f,
162
343k
          0.1585450355184006f,
163
343k
          0.5112616136591823f,
164
343k
          0.2579236279634118f,
165
343k
          0.0000000000000000,
166
343k
          0.0812611176717539f,
167
343k
          0.1856718091610980f,
168
343k
          -0.3416446842253372f,
169
343k
          0.3302282550303788f,
170
343k
          0.0702790691196284f,
171
343k
          -0.0741750459581035f,
172
343k
      },
173
343k
      {
174
343k
          0.2500000000000000,
175
343k
          0.2206518106944236f,
176
343k
          0.0000000000000000,
177
343k
          0.0000000000000000,
178
343k
          0.7071067811865476f,
179
343k
          0.6235485373547694f,
180
343k
          0.0000000000000000,
181
343k
          0.0000000000000000,
182
343k
          0.0000000000000000,
183
343k
          0.0000000000000000,
184
343k
          0.0000000000000000,
185
343k
          0.0000000000000000,
186
343k
          0.0000000000000000,
187
343k
          0.0000000000000000,
188
343k
          0.0000000000000000,
189
343k
          0.0000000000000000,
190
343k
      },
191
343k
      {
192
343k
          0.2500000000000000,
193
343k
          -0.1014005039375378f,
194
343k
          0.0000000000000000,
195
343k
          0.4706702258572536f,
196
343k
          0.0000000000000000,
197
343k
          -0.0643507165794628f,
198
343k
          -0.0403851516082220f,
199
343k
          0.0000000000000000,
200
343k
          0.1627234014286620f,
201
343k
          0.0000000000000000,
202
343k
          0.0000000000000000,
203
343k
          0.0000000000000000,
204
343k
          0.7367497537172237f,
205
343k
          0.0875511500058708f,
206
343k
          -0.2921026642334881f,
207
343k
          0.1940289303259434f,
208
343k
      },
209
343k
      {
210
343k
          0.2500000000000000,
211
343k
          -0.1014005039375377f,
212
343k
          0.1957439937204294f,
213
343k
          -0.1621205195722993f,
214
343k
          0.0000000000000000,
215
343k
          -0.0643507165794628f,
216
343k
          0.0074182263792424f,
217
343k
          -0.2904801297289980f,
218
343k
          0.0952002265347504f,
219
343k
          0.0000000000000000,
220
343k
          -0.3675398009862027f,
221
343k
          0.4921585901373873f,
222
343k
          0.2462710772207515f,
223
343k
          -0.0794670660590957f,
224
343k
          0.3623817333531167f,
225
343k
          -0.4351904965232280f,
226
343k
      },
227
343k
      {
228
343k
          0.2500000000000000,
229
343k
          -0.1014005039375376f,
230
343k
          0.2929100136981264f,
231
343k
          0.0000000000000000,
232
343k
          0.0000000000000000,
233
343k
          -0.0643507165794627f,
234
343k
          0.3935103426921017f,
235
343k
          -0.0657870154914280f,
236
343k
          0.0000000000000000,
237
343k
          -0.4082482904638628f,
238
343k
          -0.3078822139579090f,
239
343k
          -0.3852501370925192f,
240
343k
          -0.0857401903551931f,
241
343k
          -0.4613374887461511f,
242
343k
          0.0000000000000000,
243
343k
          0.2191868483885747f,
244
343k
      },
245
343k
      {
246
343k
          0.2500000000000000,
247
343k
          -0.1014005039375376f,
248
343k
          -0.4067007583026072f,
249
343k
          -0.2125574805828705f,
250
343k
          0.0000000000000000,
251
343k
          -0.0643507165794627f,
252
343k
          -0.4517556589999464f,
253
343k
          0.3046847507248840f,
254
343k
          0.3017929516615503f,
255
343k
          -0.4082482904638635f,
256
343k
          -0.1747866975480813f,
257
343k
          0.2110560104933581f,
258
343k
          -0.1426608480880734f,
259
343k
          -0.1381354035075829f,
260
343k
          -0.1743760259965108f,
261
343k
          0.1135498731499426f,
262
343k
      },
263
343k
      {
264
343k
          0.2500000000000000,
265
343k
          -0.1014005039375377f,
266
343k
          -0.1957439937204287f,
267
343k
          -0.1621205195722833f,
268
343k
          0.0000000000000000,
269
343k
          -0.0643507165794628f,
270
343k
          0.0074182263792444f,
271
343k
          0.2904801297290076f,
272
343k
          0.0952002265347505f,
273
343k
          0.0000000000000000,
274
343k
          0.3675398009862011f,
275
343k
          -0.4921585901373891f,
276
343k
          0.2462710772207514f,
277
343k
          -0.0794670660591026f,
278
343k
          0.3623817333531165f,
279
343k
          -0.4351904965232251f,
280
343k
      },
281
343k
      {
282
343k
          0.2500000000000000,
283
343k
          -0.1014005039375375f,
284
343k
          0.0000000000000000,
285
343k
          -0.4706702258572528f,
286
343k
          0.0000000000000000,
287
343k
          -0.0643507165794627f,
288
343k
          0.1107416575309343f,
289
343k
          0.0000000000000000,
290
343k
          -0.1627234014286617f,
291
343k
          0.0000000000000000,
292
343k
          0.0000000000000000,
293
343k
          0.0000000000000000,
294
343k
          0.1488339922711357f,
295
343k
          0.4972464710953509f,
296
343k
          0.2921026642334879f,
297
343k
          0.5550443808910661f,
298
343k
      },
299
343k
      {
300
343k
          0.2500000000000000,
301
343k
          -0.1014005039375377f,
302
343k
          0.1137907446044809f,
303
343k
          -0.1464291867126764f,
304
343k
          0.0000000000000000,
305
343k
          -0.0643507165794628f,
306
343k
          0.0829816309488205f,
307
343k
          -0.2388977352334460f,
308
343k
          -0.3531238544981630f,
309
343k
          -0.4082482904638630f,
310
343k
          0.4826689115059883f,
311
343k
          0.1741941265991622f,
312
343k
          -0.0476868035022925f,
313
343k
          0.1253805944856366f,
314
343k
          -0.4326608024727445f,
315
343k
          -0.2546827712406646f,
316
343k
      },
317
343k
      {
318
343k
          0.2500000000000000,
319
343k
          -0.1014005039375377f,
320
343k
          -0.4444481661973438f,
321
343k
          0.3085497062849487f,
322
343k
          0.0000000000000000,
323
343k
          -0.0643507165794628f,
324
343k
          0.1585450355183970f,
325
343k
          -0.5112616136592012f,
326
343k
          0.2579236279634129f,
327
343k
          0.0000000000000000,
328
343k
          -0.0812611176717504f,
329
343k
          -0.1856718091610990f,
330
343k
          -0.3416446842253373f,
331
343k
          0.3302282550303805f,
332
343k
          0.0702790691196282f,
333
343k
          -0.0741750459581023f,
334
343k
      },
335
343k
      {
336
343k
          0.2500000000000000,
337
343k
          -0.1014005039375376f,
338
343k
          -0.2929100136981264f,
339
343k
          0.0000000000000000,
340
343k
          0.0000000000000000,
341
343k
          -0.0643507165794627f,
342
343k
          0.3935103426921022f,
343
343k
          0.0657870154914254f,
344
343k
          0.0000000000000000,
345
343k
          0.4082482904638634f,
346
343k
          0.3078822139579031f,
347
343k
          0.3852501370925211f,
348
343k
          -0.0857401903551927f,
349
343k
          -0.4613374887461554f,
350
343k
          0.0000000000000000,
351
343k
          0.2191868483885728f,
352
343k
      },
353
343k
      {
354
343k
          0.2500000000000000,
355
343k
          -0.1014005039375376f,
356
343k
          -0.1137907446044814f,
357
343k
          -0.1464291867126654f,
358
343k
          0.0000000000000000,
359
343k
          -0.0643507165794627f,
360
343k
          0.0829816309488214f,
361
343k
          0.2388977352334547f,
362
343k
          -0.3531238544981624f,
363
343k
          0.4082482904638630f,
364
343k
          -0.4826689115059858f,
365
343k
          -0.1741941265991621f,
366
343k
          -0.0476868035022928f,
367
343k
          0.1253805944856431f,
368
343k
          -0.4326608024727457f,
369
343k
          -0.2546827712406641f,
370
343k
      },
371
343k
      {
372
343k
          0.2500000000000000,
373
343k
          -0.1014005039375374f,
374
343k
          0.0000000000000000,
375
343k
          0.4251149611657548f,
376
343k
          0.0000000000000000,
377
343k
          -0.0643507165794626f,
378
343k
          -0.4517556589999480f,
379
343k
          0.0000000000000000,
380
343k
          -0.6035859033230976f,
381
343k
          0.0000000000000000,
382
343k
          0.0000000000000000,
383
343k
          0.0000000000000000,
384
343k
          -0.1426608480880724f,
385
343k
          -0.1381354035075845f,
386
343k
          0.3487520519930227f,
387
343k
          0.1135498731499429f,
388
343k
      },
389
343k
  };
390
391
343k
  const HWY_CAPPED(float, 16) d;
392
1.02M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
686k
    auto scalar = Zero(d);
394
11.6M
    for (size_t j = 0; j < 16; j++) {
395
10.9M
      auto px = Set(d, pixels[j]);
396
10.9M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
10.9M
      scalar = MulAdd(px, basis, scalar);
398
10.9M
    }
399
686k
    Store(scalar, d, coeffs + i);
400
686k
  }
401
343k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
55.7M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
55.7M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
55.7M
      {
102
55.7M
          0.2500000000000000,
103
55.7M
          0.8769029297991420f,
104
55.7M
          0.0000000000000000,
105
55.7M
          0.0000000000000000,
106
55.7M
          0.0000000000000000,
107
55.7M
          -0.4105377591765233f,
108
55.7M
          0.0000000000000000,
109
55.7M
          0.0000000000000000,
110
55.7M
          0.0000000000000000,
111
55.7M
          0.0000000000000000,
112
55.7M
          0.0000000000000000,
113
55.7M
          0.0000000000000000,
114
55.7M
          0.0000000000000000,
115
55.7M
          0.0000000000000000,
116
55.7M
          0.0000000000000000,
117
55.7M
          0.0000000000000000,
118
55.7M
      },
119
55.7M
      {
120
55.7M
          0.2500000000000000,
121
55.7M
          0.2206518106944235f,
122
55.7M
          0.0000000000000000,
123
55.7M
          0.0000000000000000,
124
55.7M
          -0.7071067811865474f,
125
55.7M
          0.6235485373547691f,
126
55.7M
          0.0000000000000000,
127
55.7M
          0.0000000000000000,
128
55.7M
          0.0000000000000000,
129
55.7M
          0.0000000000000000,
130
55.7M
          0.0000000000000000,
131
55.7M
          0.0000000000000000,
132
55.7M
          0.0000000000000000,
133
55.7M
          0.0000000000000000,
134
55.7M
          0.0000000000000000,
135
55.7M
          0.0000000000000000,
136
55.7M
      },
137
55.7M
      {
138
55.7M
          0.2500000000000000,
139
55.7M
          -0.1014005039375376f,
140
55.7M
          0.4067007583026075f,
141
55.7M
          -0.2125574805828875f,
142
55.7M
          0.0000000000000000,
143
55.7M
          -0.0643507165794627f,
144
55.7M
          -0.4517556589999482f,
145
55.7M
          -0.3046847507248690f,
146
55.7M
          0.3017929516615495f,
147
55.7M
          0.4082482904638627f,
148
55.7M
          0.1747866975480809f,
149
55.7M
          -0.2110560104933578f,
150
55.7M
          -0.1426608480880726f,
151
55.7M
          -0.1381354035075859f,
152
55.7M
          -0.1743760259965107f,
153
55.7M
          0.1135498731499434f,
154
55.7M
      },
155
55.7M
      {
156
55.7M
          0.2500000000000000,
157
55.7M
          -0.1014005039375375f,
158
55.7M
          0.4444481661973445f,
159
55.7M
          0.3085497062849767f,
160
55.7M
          0.0000000000000000f,
161
55.7M
          -0.0643507165794627f,
162
55.7M
          0.1585450355184006f,
163
55.7M
          0.5112616136591823f,
164
55.7M
          0.2579236279634118f,
165
55.7M
          0.0000000000000000,
166
55.7M
          0.0812611176717539f,
167
55.7M
          0.1856718091610980f,
168
55.7M
          -0.3416446842253372f,
169
55.7M
          0.3302282550303788f,
170
55.7M
          0.0702790691196284f,
171
55.7M
          -0.0741750459581035f,
172
55.7M
      },
173
55.7M
      {
174
55.7M
          0.2500000000000000,
175
55.7M
          0.2206518106944236f,
176
55.7M
          0.0000000000000000,
177
55.7M
          0.0000000000000000,
178
55.7M
          0.7071067811865476f,
179
55.7M
          0.6235485373547694f,
180
55.7M
          0.0000000000000000,
181
55.7M
          0.0000000000000000,
182
55.7M
          0.0000000000000000,
183
55.7M
          0.0000000000000000,
184
55.7M
          0.0000000000000000,
185
55.7M
          0.0000000000000000,
186
55.7M
          0.0000000000000000,
187
55.7M
          0.0000000000000000,
188
55.7M
          0.0000000000000000,
189
55.7M
          0.0000000000000000,
190
55.7M
      },
191
55.7M
      {
192
55.7M
          0.2500000000000000,
193
55.7M
          -0.1014005039375378f,
194
55.7M
          0.0000000000000000,
195
55.7M
          0.4706702258572536f,
196
55.7M
          0.0000000000000000,
197
55.7M
          -0.0643507165794628f,
198
55.7M
          -0.0403851516082220f,
199
55.7M
          0.0000000000000000,
200
55.7M
          0.1627234014286620f,
201
55.7M
          0.0000000000000000,
202
55.7M
          0.0000000000000000,
203
55.7M
          0.0000000000000000,
204
55.7M
          0.7367497537172237f,
205
55.7M
          0.0875511500058708f,
206
55.7M
          -0.2921026642334881f,
207
55.7M
          0.1940289303259434f,
208
55.7M
      },
209
55.7M
      {
210
55.7M
          0.2500000000000000,
211
55.7M
          -0.1014005039375377f,
212
55.7M
          0.1957439937204294f,
213
55.7M
          -0.1621205195722993f,
214
55.7M
          0.0000000000000000,
215
55.7M
          -0.0643507165794628f,
216
55.7M
          0.0074182263792424f,
217
55.7M
          -0.2904801297289980f,
218
55.7M
          0.0952002265347504f,
219
55.7M
          0.0000000000000000,
220
55.7M
          -0.3675398009862027f,
221
55.7M
          0.4921585901373873f,
222
55.7M
          0.2462710772207515f,
223
55.7M
          -0.0794670660590957f,
224
55.7M
          0.3623817333531167f,
225
55.7M
          -0.4351904965232280f,
226
55.7M
      },
227
55.7M
      {
228
55.7M
          0.2500000000000000,
229
55.7M
          -0.1014005039375376f,
230
55.7M
          0.2929100136981264f,
231
55.7M
          0.0000000000000000,
232
55.7M
          0.0000000000000000,
233
55.7M
          -0.0643507165794627f,
234
55.7M
          0.3935103426921017f,
235
55.7M
          -0.0657870154914280f,
236
55.7M
          0.0000000000000000,
237
55.7M
          -0.4082482904638628f,
238
55.7M
          -0.3078822139579090f,
239
55.7M
          -0.3852501370925192f,
240
55.7M
          -0.0857401903551931f,
241
55.7M
          -0.4613374887461511f,
242
55.7M
          0.0000000000000000,
243
55.7M
          0.2191868483885747f,
244
55.7M
      },
245
55.7M
      {
246
55.7M
          0.2500000000000000,
247
55.7M
          -0.1014005039375376f,
248
55.7M
          -0.4067007583026072f,
249
55.7M
          -0.2125574805828705f,
250
55.7M
          0.0000000000000000,
251
55.7M
          -0.0643507165794627f,
252
55.7M
          -0.4517556589999464f,
253
55.7M
          0.3046847507248840f,
254
55.7M
          0.3017929516615503f,
255
55.7M
          -0.4082482904638635f,
256
55.7M
          -0.1747866975480813f,
257
55.7M
          0.2110560104933581f,
258
55.7M
          -0.1426608480880734f,
259
55.7M
          -0.1381354035075829f,
260
55.7M
          -0.1743760259965108f,
261
55.7M
          0.1135498731499426f,
262
55.7M
      },
263
55.7M
      {
264
55.7M
          0.2500000000000000,
265
55.7M
          -0.1014005039375377f,
266
55.7M
          -0.1957439937204287f,
267
55.7M
          -0.1621205195722833f,
268
55.7M
          0.0000000000000000,
269
55.7M
          -0.0643507165794628f,
270
55.7M
          0.0074182263792444f,
271
55.7M
          0.2904801297290076f,
272
55.7M
          0.0952002265347505f,
273
55.7M
          0.0000000000000000,
274
55.7M
          0.3675398009862011f,
275
55.7M
          -0.4921585901373891f,
276
55.7M
          0.2462710772207514f,
277
55.7M
          -0.0794670660591026f,
278
55.7M
          0.3623817333531165f,
279
55.7M
          -0.4351904965232251f,
280
55.7M
      },
281
55.7M
      {
282
55.7M
          0.2500000000000000,
283
55.7M
          -0.1014005039375375f,
284
55.7M
          0.0000000000000000,
285
55.7M
          -0.4706702258572528f,
286
55.7M
          0.0000000000000000,
287
55.7M
          -0.0643507165794627f,
288
55.7M
          0.1107416575309343f,
289
55.7M
          0.0000000000000000,
290
55.7M
          -0.1627234014286617f,
291
55.7M
          0.0000000000000000,
292
55.7M
          0.0000000000000000,
293
55.7M
          0.0000000000000000,
294
55.7M
          0.1488339922711357f,
295
55.7M
          0.4972464710953509f,
296
55.7M
          0.2921026642334879f,
297
55.7M
          0.5550443808910661f,
298
55.7M
      },
299
55.7M
      {
300
55.7M
          0.2500000000000000,
301
55.7M
          -0.1014005039375377f,
302
55.7M
          0.1137907446044809f,
303
55.7M
          -0.1464291867126764f,
304
55.7M
          0.0000000000000000,
305
55.7M
          -0.0643507165794628f,
306
55.7M
          0.0829816309488205f,
307
55.7M
          -0.2388977352334460f,
308
55.7M
          -0.3531238544981630f,
309
55.7M
          -0.4082482904638630f,
310
55.7M
          0.4826689115059883f,
311
55.7M
          0.1741941265991622f,
312
55.7M
          -0.0476868035022925f,
313
55.7M
          0.1253805944856366f,
314
55.7M
          -0.4326608024727445f,
315
55.7M
          -0.2546827712406646f,
316
55.7M
      },
317
55.7M
      {
318
55.7M
          0.2500000000000000,
319
55.7M
          -0.1014005039375377f,
320
55.7M
          -0.4444481661973438f,
321
55.7M
          0.3085497062849487f,
322
55.7M
          0.0000000000000000,
323
55.7M
          -0.0643507165794628f,
324
55.7M
          0.1585450355183970f,
325
55.7M
          -0.5112616136592012f,
326
55.7M
          0.2579236279634129f,
327
55.7M
          0.0000000000000000,
328
55.7M
          -0.0812611176717504f,
329
55.7M
          -0.1856718091610990f,
330
55.7M
          -0.3416446842253373f,
331
55.7M
          0.3302282550303805f,
332
55.7M
          0.0702790691196282f,
333
55.7M
          -0.0741750459581023f,
334
55.7M
      },
335
55.7M
      {
336
55.7M
          0.2500000000000000,
337
55.7M
          -0.1014005039375376f,
338
55.7M
          -0.2929100136981264f,
339
55.7M
          0.0000000000000000,
340
55.7M
          0.0000000000000000,
341
55.7M
          -0.0643507165794627f,
342
55.7M
          0.3935103426921022f,
343
55.7M
          0.0657870154914254f,
344
55.7M
          0.0000000000000000,
345
55.7M
          0.4082482904638634f,
346
55.7M
          0.3078822139579031f,
347
55.7M
          0.3852501370925211f,
348
55.7M
          -0.0857401903551927f,
349
55.7M
          -0.4613374887461554f,
350
55.7M
          0.0000000000000000,
351
55.7M
          0.2191868483885728f,
352
55.7M
      },
353
55.7M
      {
354
55.7M
          0.2500000000000000,
355
55.7M
          -0.1014005039375376f,
356
55.7M
          -0.1137907446044814f,
357
55.7M
          -0.1464291867126654f,
358
55.7M
          0.0000000000000000,
359
55.7M
          -0.0643507165794627f,
360
55.7M
          0.0829816309488214f,
361
55.7M
          0.2388977352334547f,
362
55.7M
          -0.3531238544981624f,
363
55.7M
          0.4082482904638630f,
364
55.7M
          -0.4826689115059858f,
365
55.7M
          -0.1741941265991621f,
366
55.7M
          -0.0476868035022928f,
367
55.7M
          0.1253805944856431f,
368
55.7M
          -0.4326608024727457f,
369
55.7M
          -0.2546827712406641f,
370
55.7M
      },
371
55.7M
      {
372
55.7M
          0.2500000000000000,
373
55.7M
          -0.1014005039375374f,
374
55.7M
          0.0000000000000000,
375
55.7M
          0.4251149611657548f,
376
55.7M
          0.0000000000000000,
377
55.7M
          -0.0643507165794626f,
378
55.7M
          -0.4517556589999480f,
379
55.7M
          0.0000000000000000,
380
55.7M
          -0.6035859033230976f,
381
55.7M
          0.0000000000000000,
382
55.7M
          0.0000000000000000,
383
55.7M
          0.0000000000000000,
384
55.7M
          -0.1426608480880724f,
385
55.7M
          -0.1381354035075845f,
386
55.7M
          0.3487520519930227f,
387
55.7M
          0.1135498731499429f,
388
55.7M
      },
389
55.7M
  };
390
391
55.7M
  const HWY_CAPPED(float, 16) d;
392
167M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
111M
    auto scalar = Zero(d);
394
1.89G
    for (size_t j = 0; j < 16; j++) {
395
1.78G
      auto px = Set(d, pixels[j]);
396
1.78G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
1.78G
      scalar = MulAdd(px, basis, scalar);
398
1.78G
    }
399
111M
    Store(scalar, d, coeffs + i);
400
111M
  }
401
55.7M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
402
403
// Coefficient layout:
404
//  - (even, even) positions hold AFV coefficients
405
//  - (odd, even) positions hold DCT4x4 coefficients
406
//  - (any, odd) positions hold DCT4x8 coefficients
407
template <size_t afv_kind>
408
void AFVTransformFromPixels(const float* JXL_RESTRICT pixels,
409
                            size_t pixels_stride,
410
56.4M
                            float* JXL_RESTRICT coefficients) {
411
56.4M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
56.4M
  size_t afv_x = afv_kind & 1;
413
56.4M
  size_t afv_y = afv_kind / 2;
414
56.4M
  HWY_ALIGN float block[4 * 8] = {};
415
282M
  for (size_t iy = 0; iy < 4; iy++) {
416
1.12G
    for (size_t ix = 0; ix < 4; ix++) {
417
903M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
903M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
903M
    }
420
225M
  }
421
  // AFV coefficients in (even, even) positions.
422
56.4M
  HWY_ALIGN float coeff[4 * 4];
423
56.4M
  AFVDCT4x4(block, coeff);
424
282M
  for (size_t iy = 0; iy < 4; iy++) {
425
1.12G
    for (size_t ix = 0; ix < 4; ix++) {
426
903M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
903M
    }
428
225M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
56.4M
  ComputeScaledDCT<4, 4>()(
431
56.4M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
56.4M
              pixels_stride),
433
56.4M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
282M
  for (size_t iy = 0; iy < 4; iy++) {
436
2.03G
    for (size_t ix = 0; ix < 8; ix++) {
437
1.80G
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
1.80G
    }
439
225M
  }
440
  // 4x8 DCT of the other half of the block.
441
56.4M
  ComputeScaledDCT<4, 8>()(
442
56.4M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
56.4M
      block, scratch_space);
444
282M
  for (size_t iy = 0; iy < 4; iy++) {
445
2.03G
    for (size_t ix = 0; ix < 8; ix++) {
446
1.80G
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
1.80G
    }
448
225M
  }
449
56.4M
  float block00 = coefficients[0] * 0.25f;
450
56.4M
  float block01 = coefficients[1];
451
56.4M
  float block10 = coefficients[8];
452
56.4M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
56.4M
  coefficients[1] = (block00 - block01) * 0.5f;
454
56.4M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
56.4M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
114k
                            float* JXL_RESTRICT coefficients) {
411
114k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
114k
  size_t afv_x = afv_kind & 1;
413
114k
  size_t afv_y = afv_kind / 2;
414
114k
  HWY_ALIGN float block[4 * 8] = {};
415
572k
  for (size_t iy = 0; iy < 4; iy++) {
416
2.29M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.83M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.83M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.83M
    }
420
458k
  }
421
  // AFV coefficients in (even, even) positions.
422
114k
  HWY_ALIGN float coeff[4 * 4];
423
114k
  AFVDCT4x4(block, coeff);
424
572k
  for (size_t iy = 0; iy < 4; iy++) {
425
2.29M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.83M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.83M
    }
428
458k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
114k
  ComputeScaledDCT<4, 4>()(
431
114k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
114k
              pixels_stride),
433
114k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
572k
  for (size_t iy = 0; iy < 4; iy++) {
436
4.12M
    for (size_t ix = 0; ix < 8; ix++) {
437
3.66M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
3.66M
    }
439
458k
  }
440
  // 4x8 DCT of the other half of the block.
441
114k
  ComputeScaledDCT<4, 8>()(
442
114k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
114k
      block, scratch_space);
444
572k
  for (size_t iy = 0; iy < 4; iy++) {
445
4.12M
    for (size_t ix = 0; ix < 8; ix++) {
446
3.66M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
3.66M
    }
448
458k
  }
449
114k
  float block00 = coefficients[0] * 0.25f;
450
114k
  float block01 = coefficients[1];
451
114k
  float block10 = coefficients[8];
452
114k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
114k
  coefficients[1] = (block00 - block01) * 0.5f;
454
114k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
114k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
64.3k
                            float* JXL_RESTRICT coefficients) {
411
64.3k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
64.3k
  size_t afv_x = afv_kind & 1;
413
64.3k
  size_t afv_y = afv_kind / 2;
414
64.3k
  HWY_ALIGN float block[4 * 8] = {};
415
321k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.28M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.03M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.03M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.03M
    }
420
257k
  }
421
  // AFV coefficients in (even, even) positions.
422
64.3k
  HWY_ALIGN float coeff[4 * 4];
423
64.3k
  AFVDCT4x4(block, coeff);
424
321k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.28M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.03M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.03M
    }
428
257k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
64.3k
  ComputeScaledDCT<4, 4>()(
431
64.3k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
64.3k
              pixels_stride),
433
64.3k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
321k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.31M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.06M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.06M
    }
439
257k
  }
440
  // 4x8 DCT of the other half of the block.
441
64.3k
  ComputeScaledDCT<4, 8>()(
442
64.3k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
64.3k
      block, scratch_space);
444
321k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.31M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.06M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.06M
    }
448
257k
  }
449
64.3k
  float block00 = coefficients[0] * 0.25f;
450
64.3k
  float block01 = coefficients[1];
451
64.3k
  float block10 = coefficients[8];
452
64.3k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
64.3k
  coefficients[1] = (block00 - block01) * 0.5f;
454
64.3k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
64.3k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
81.3k
                            float* JXL_RESTRICT coefficients) {
411
81.3k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
81.3k
  size_t afv_x = afv_kind & 1;
413
81.3k
  size_t afv_y = afv_kind / 2;
414
81.3k
  HWY_ALIGN float block[4 * 8] = {};
415
406k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.62M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.30M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.30M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.30M
    }
420
325k
  }
421
  // AFV coefficients in (even, even) positions.
422
81.3k
  HWY_ALIGN float coeff[4 * 4];
423
81.3k
  AFVDCT4x4(block, coeff);
424
406k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.62M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.30M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.30M
    }
428
325k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
81.3k
  ComputeScaledDCT<4, 4>()(
431
81.3k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
81.3k
              pixels_stride),
433
81.3k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
406k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.93M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.60M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.60M
    }
439
325k
  }
440
  // 4x8 DCT of the other half of the block.
441
81.3k
  ComputeScaledDCT<4, 8>()(
442
81.3k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
81.3k
      block, scratch_space);
444
406k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.93M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.60M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.60M
    }
448
325k
  }
449
81.3k
  float block00 = coefficients[0] * 0.25f;
450
81.3k
  float block01 = coefficients[1];
451
81.3k
  float block10 = coefficients[8];
452
81.3k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
81.3k
  coefficients[1] = (block00 - block01) * 0.5f;
454
81.3k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
81.3k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
82.9k
                            float* JXL_RESTRICT coefficients) {
411
82.9k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
82.9k
  size_t afv_x = afv_kind & 1;
413
82.9k
  size_t afv_y = afv_kind / 2;
414
82.9k
  HWY_ALIGN float block[4 * 8] = {};
415
414k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.65M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.32M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.32M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.32M
    }
420
331k
  }
421
  // AFV coefficients in (even, even) positions.
422
82.9k
  HWY_ALIGN float coeff[4 * 4];
423
82.9k
  AFVDCT4x4(block, coeff);
424
414k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.65M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.32M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.32M
    }
428
331k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
82.9k
  ComputeScaledDCT<4, 4>()(
431
82.9k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
82.9k
              pixels_stride),
433
82.9k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
414k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.98M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.65M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.65M
    }
439
331k
  }
440
  // 4x8 DCT of the other half of the block.
441
82.9k
  ComputeScaledDCT<4, 8>()(
442
82.9k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
82.9k
      block, scratch_space);
444
414k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.98M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.65M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.65M
    }
448
331k
  }
449
82.9k
  float block00 = coefficients[0] * 0.25f;
450
82.9k
  float block01 = coefficients[1];
451
82.9k
  float block10 = coefficients[8];
452
82.9k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
82.9k
  coefficients[1] = (block00 - block01) * 0.5f;
454
82.9k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
82.9k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
114k
                            float* JXL_RESTRICT coefficients) {
411
114k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
114k
  size_t afv_x = afv_kind & 1;
413
114k
  size_t afv_y = afv_kind / 2;
414
114k
  HWY_ALIGN float block[4 * 8] = {};
415
572k
  for (size_t iy = 0; iy < 4; iy++) {
416
2.29M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.83M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.83M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.83M
    }
420
458k
  }
421
  // AFV coefficients in (even, even) positions.
422
114k
  HWY_ALIGN float coeff[4 * 4];
423
114k
  AFVDCT4x4(block, coeff);
424
572k
  for (size_t iy = 0; iy < 4; iy++) {
425
2.29M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.83M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.83M
    }
428
458k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
114k
  ComputeScaledDCT<4, 4>()(
431
114k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
114k
              pixels_stride),
433
114k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
572k
  for (size_t iy = 0; iy < 4; iy++) {
436
4.12M
    for (size_t ix = 0; ix < 8; ix++) {
437
3.66M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
3.66M
    }
439
458k
  }
440
  // 4x8 DCT of the other half of the block.
441
114k
  ComputeScaledDCT<4, 8>()(
442
114k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
114k
      block, scratch_space);
444
572k
  for (size_t iy = 0; iy < 4; iy++) {
445
4.12M
    for (size_t ix = 0; ix < 8; ix++) {
446
3.66M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
3.66M
    }
448
458k
  }
449
114k
  float block00 = coefficients[0] * 0.25f;
450
114k
  float block01 = coefficients[1];
451
114k
  float block10 = coefficients[8];
452
114k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
114k
  coefficients[1] = (block00 - block01) * 0.5f;
454
114k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
114k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
64.3k
                            float* JXL_RESTRICT coefficients) {
411
64.3k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
64.3k
  size_t afv_x = afv_kind & 1;
413
64.3k
  size_t afv_y = afv_kind / 2;
414
64.3k
  HWY_ALIGN float block[4 * 8] = {};
415
321k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.28M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.03M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.03M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.03M
    }
420
257k
  }
421
  // AFV coefficients in (even, even) positions.
422
64.3k
  HWY_ALIGN float coeff[4 * 4];
423
64.3k
  AFVDCT4x4(block, coeff);
424
321k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.28M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.03M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.03M
    }
428
257k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
64.3k
  ComputeScaledDCT<4, 4>()(
431
64.3k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
64.3k
              pixels_stride),
433
64.3k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
321k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.31M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.06M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.06M
    }
439
257k
  }
440
  // 4x8 DCT of the other half of the block.
441
64.3k
  ComputeScaledDCT<4, 8>()(
442
64.3k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
64.3k
      block, scratch_space);
444
321k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.31M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.06M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.06M
    }
448
257k
  }
449
64.3k
  float block00 = coefficients[0] * 0.25f;
450
64.3k
  float block01 = coefficients[1];
451
64.3k
  float block10 = coefficients[8];
452
64.3k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
64.3k
  coefficients[1] = (block00 - block01) * 0.5f;
454
64.3k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
64.3k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
81.3k
                            float* JXL_RESTRICT coefficients) {
411
81.3k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
81.3k
  size_t afv_x = afv_kind & 1;
413
81.3k
  size_t afv_y = afv_kind / 2;
414
81.3k
  HWY_ALIGN float block[4 * 8] = {};
415
406k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.62M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.30M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.30M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.30M
    }
420
325k
  }
421
  // AFV coefficients in (even, even) positions.
422
81.3k
  HWY_ALIGN float coeff[4 * 4];
423
81.3k
  AFVDCT4x4(block, coeff);
424
406k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.62M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.30M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.30M
    }
428
325k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
81.3k
  ComputeScaledDCT<4, 4>()(
431
81.3k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
81.3k
              pixels_stride),
433
81.3k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
406k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.93M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.60M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.60M
    }
439
325k
  }
440
  // 4x8 DCT of the other half of the block.
441
81.3k
  ComputeScaledDCT<4, 8>()(
442
81.3k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
81.3k
      block, scratch_space);
444
406k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.93M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.60M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.60M
    }
448
325k
  }
449
81.3k
  float block00 = coefficients[0] * 0.25f;
450
81.3k
  float block01 = coefficients[1];
451
81.3k
  float block10 = coefficients[8];
452
81.3k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
81.3k
  coefficients[1] = (block00 - block01) * 0.5f;
454
81.3k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
81.3k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
82.9k
                            float* JXL_RESTRICT coefficients) {
411
82.9k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
82.9k
  size_t afv_x = afv_kind & 1;
413
82.9k
  size_t afv_y = afv_kind / 2;
414
82.9k
  HWY_ALIGN float block[4 * 8] = {};
415
414k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.65M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.32M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.32M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.32M
    }
420
331k
  }
421
  // AFV coefficients in (even, even) positions.
422
82.9k
  HWY_ALIGN float coeff[4 * 4];
423
82.9k
  AFVDCT4x4(block, coeff);
424
414k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.65M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.32M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.32M
    }
428
331k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
82.9k
  ComputeScaledDCT<4, 4>()(
431
82.9k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
82.9k
              pixels_stride),
433
82.9k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
414k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.98M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.65M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.65M
    }
439
331k
  }
440
  // 4x8 DCT of the other half of the block.
441
82.9k
  ComputeScaledDCT<4, 8>()(
442
82.9k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
82.9k
      block, scratch_space);
444
414k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.98M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.65M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.65M
    }
448
331k
  }
449
82.9k
  float block00 = coefficients[0] * 0.25f;
450
82.9k
  float block01 = coefficients[1];
451
82.9k
  float block10 = coefficients[8];
452
82.9k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
82.9k
  coefficients[1] = (block00 - block01) * 0.5f;
454
82.9k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
82.9k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
13.9M
                            float* JXL_RESTRICT coefficients) {
411
13.9M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
13.9M
  size_t afv_x = afv_kind & 1;
413
13.9M
  size_t afv_y = afv_kind / 2;
414
13.9M
  HWY_ALIGN float block[4 * 8] = {};
415
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
416
278M
    for (size_t ix = 0; ix < 4; ix++) {
417
223M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
223M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
223M
    }
420
55.7M
  }
421
  // AFV coefficients in (even, even) positions.
422
13.9M
  HWY_ALIGN float coeff[4 * 4];
423
13.9M
  AFVDCT4x4(block, coeff);
424
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
425
278M
    for (size_t ix = 0; ix < 4; ix++) {
426
223M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
223M
    }
428
55.7M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
13.9M
  ComputeScaledDCT<4, 4>()(
431
13.9M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
13.9M
              pixels_stride),
433
13.9M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
436
501M
    for (size_t ix = 0; ix < 8; ix++) {
437
446M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
446M
    }
439
55.7M
  }
440
  // 4x8 DCT of the other half of the block.
441
13.9M
  ComputeScaledDCT<4, 8>()(
442
13.9M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
13.9M
      block, scratch_space);
444
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
445
501M
    for (size_t ix = 0; ix < 8; ix++) {
446
446M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
446M
    }
448
55.7M
  }
449
13.9M
  float block00 = coefficients[0] * 0.25f;
450
13.9M
  float block01 = coefficients[1];
451
13.9M
  float block10 = coefficients[8];
452
13.9M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
13.9M
  coefficients[1] = (block00 - block01) * 0.5f;
454
13.9M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
13.9M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
13.9M
                            float* JXL_RESTRICT coefficients) {
411
13.9M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
13.9M
  size_t afv_x = afv_kind & 1;
413
13.9M
  size_t afv_y = afv_kind / 2;
414
13.9M
  HWY_ALIGN float block[4 * 8] = {};
415
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
416
278M
    for (size_t ix = 0; ix < 4; ix++) {
417
223M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
223M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
223M
    }
420
55.7M
  }
421
  // AFV coefficients in (even, even) positions.
422
13.9M
  HWY_ALIGN float coeff[4 * 4];
423
13.9M
  AFVDCT4x4(block, coeff);
424
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
425
278M
    for (size_t ix = 0; ix < 4; ix++) {
426
223M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
223M
    }
428
55.7M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
13.9M
  ComputeScaledDCT<4, 4>()(
431
13.9M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
13.9M
              pixels_stride),
433
13.9M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
436
501M
    for (size_t ix = 0; ix < 8; ix++) {
437
446M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
446M
    }
439
55.7M
  }
440
  // 4x8 DCT of the other half of the block.
441
13.9M
  ComputeScaledDCT<4, 8>()(
442
13.9M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
13.9M
      block, scratch_space);
444
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
445
501M
    for (size_t ix = 0; ix < 8; ix++) {
446
446M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
446M
    }
448
55.7M
  }
449
13.9M
  float block00 = coefficients[0] * 0.25f;
450
13.9M
  float block01 = coefficients[1];
451
13.9M
  float block10 = coefficients[8];
452
13.9M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
13.9M
  coefficients[1] = (block00 - block01) * 0.5f;
454
13.9M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
13.9M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
13.9M
                            float* JXL_RESTRICT coefficients) {
411
13.9M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
13.9M
  size_t afv_x = afv_kind & 1;
413
13.9M
  size_t afv_y = afv_kind / 2;
414
13.9M
  HWY_ALIGN float block[4 * 8] = {};
415
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
416
278M
    for (size_t ix = 0; ix < 4; ix++) {
417
223M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
223M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
223M
    }
420
55.7M
  }
421
  // AFV coefficients in (even, even) positions.
422
13.9M
  HWY_ALIGN float coeff[4 * 4];
423
13.9M
  AFVDCT4x4(block, coeff);
424
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
425
278M
    for (size_t ix = 0; ix < 4; ix++) {
426
223M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
223M
    }
428
55.7M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
13.9M
  ComputeScaledDCT<4, 4>()(
431
13.9M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
13.9M
              pixels_stride),
433
13.9M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
436
501M
    for (size_t ix = 0; ix < 8; ix++) {
437
446M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
446M
    }
439
55.7M
  }
440
  // 4x8 DCT of the other half of the block.
441
13.9M
  ComputeScaledDCT<4, 8>()(
442
13.9M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
13.9M
      block, scratch_space);
444
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
445
501M
    for (size_t ix = 0; ix < 8; ix++) {
446
446M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
446M
    }
448
55.7M
  }
449
13.9M
  float block00 = coefficients[0] * 0.25f;
450
13.9M
  float block01 = coefficients[1];
451
13.9M
  float block10 = coefficients[8];
452
13.9M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
13.9M
  coefficients[1] = (block00 - block01) * 0.5f;
454
13.9M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
13.9M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
13.9M
                            float* JXL_RESTRICT coefficients) {
411
13.9M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
13.9M
  size_t afv_x = afv_kind & 1;
413
13.9M
  size_t afv_y = afv_kind / 2;
414
13.9M
  HWY_ALIGN float block[4 * 8] = {};
415
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
416
278M
    for (size_t ix = 0; ix < 4; ix++) {
417
223M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
223M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
223M
    }
420
55.7M
  }
421
  // AFV coefficients in (even, even) positions.
422
13.9M
  HWY_ALIGN float coeff[4 * 4];
423
13.9M
  AFVDCT4x4(block, coeff);
424
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
425
278M
    for (size_t ix = 0; ix < 4; ix++) {
426
223M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
223M
    }
428
55.7M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
13.9M
  ComputeScaledDCT<4, 4>()(
431
13.9M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
13.9M
              pixels_stride),
433
13.9M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
436
501M
    for (size_t ix = 0; ix < 8; ix++) {
437
446M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
446M
    }
439
55.7M
  }
440
  // 4x8 DCT of the other half of the block.
441
13.9M
  ComputeScaledDCT<4, 8>()(
442
13.9M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
13.9M
      block, scratch_space);
444
69.7M
  for (size_t iy = 0; iy < 4; iy++) {
445
501M
    for (size_t ix = 0; ix < 8; ix++) {
446
446M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
446M
    }
448
55.7M
  }
449
13.9M
  float block00 = coefficients[0] * 0.25f;
450
13.9M
  float block01 = coefficients[1];
451
13.9M
  float block10 = coefficients[8];
452
13.9M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
13.9M
  coefficients[1] = (block00 - block01) * 0.5f;
454
13.9M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
13.9M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
456
457
HWY_MAYBE_UNUSED void TransformFromPixels(const AcStrategyType strategy,
458
                                          const float* JXL_RESTRICT pixels,
459
                                          size_t pixels_stride,
460
                                          float* JXL_RESTRICT coefficients,
461
201M
                                          float* JXL_RESTRICT scratch_space) {
462
201M
  using Type = AcStrategyType;
463
201M
  switch (strategy) {
464
15.5M
    case Type::IDENTITY: {
465
46.7M
      for (size_t y = 0; y < 2; y++) {
466
93.4M
        for (size_t x = 0; x < 2; x++) {
467
62.3M
          float block_dc = 0;
468
311M
          for (size_t iy = 0; iy < 4; iy++) {
469
1.24G
            for (size_t ix = 0; ix < 4; ix++) {
470
997M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
997M
            }
472
249M
          }
473
62.3M
          block_dc *= 1.0f / 16;
474
311M
          for (size_t iy = 0; iy < 4; iy++) {
475
1.24G
            for (size_t ix = 0; ix < 4; ix++) {
476
997M
              if (ix == 1 && iy == 1) continue;
477
934M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
934M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
934M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
934M
            }
481
249M
          }
482
62.3M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
62.3M
          coefficients[y * 8 + x] = block_dc;
484
62.3M
        }
485
31.1M
      }
486
15.5M
      float block00 = coefficients[0];
487
15.5M
      float block01 = coefficients[1];
488
15.5M
      float block10 = coefficients[8];
489
15.5M
      float block11 = coefficients[9];
490
15.5M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
15.5M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
15.5M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
15.5M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
15.5M
      break;
495
0
    }
496
14.3M
    case Type::DCT8X4: {
497
42.9M
      for (size_t x = 0; x < 2; x++) {
498
28.6M
        HWY_ALIGN float block[4 * 8];
499
28.6M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
28.6M
                                 scratch_space);
501
143M
        for (size_t iy = 0; iy < 4; iy++) {
502
1.03G
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
915M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
915M
          }
506
114M
        }
507
28.6M
      }
508
14.3M
      float block0 = coefficients[0];
509
14.3M
      float block1 = coefficients[8];
510
14.3M
      coefficients[0] = (block0 + block1) * 0.5f;
511
14.3M
      coefficients[8] = (block0 - block1) * 0.5f;
512
14.3M
      break;
513
0
    }
514
14.0M
    case Type::DCT4X8: {
515
42.2M
      for (size_t y = 0; y < 2; y++) {
516
28.1M
        HWY_ALIGN float block[4 * 8];
517
28.1M
        ComputeScaledDCT<4, 8>()(
518
28.1M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
28.1M
            scratch_space);
520
140M
        for (size_t iy = 0; iy < 4; iy++) {
521
1.01G
          for (size_t ix = 0; ix < 8; ix++) {
522
902M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
902M
          }
524
112M
        }
525
28.1M
      }
526
14.0M
      float block0 = coefficients[0];
527
14.0M
      float block1 = coefficients[8];
528
14.0M
      coefficients[0] = (block0 + block1) * 0.5f;
529
14.0M
      coefficients[8] = (block0 - block1) * 0.5f;
530
14.0M
      break;
531
0
    }
532
13.9M
    case Type::DCT4X4: {
533
41.8M
      for (size_t y = 0; y < 2; y++) {
534
83.6M
        for (size_t x = 0; x < 2; x++) {
535
55.7M
          HWY_ALIGN float block[4 * 4];
536
55.7M
          ComputeScaledDCT<4, 4>()(
537
55.7M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
55.7M
              block, scratch_space);
539
278M
          for (size_t iy = 0; iy < 4; iy++) {
540
1.11G
            for (size_t ix = 0; ix < 4; ix++) {
541
892M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
892M
            }
543
223M
          }
544
55.7M
        }
545
27.8M
      }
546
13.9M
      float block00 = coefficients[0];
547
13.9M
      float block01 = coefficients[1];
548
13.9M
      float block10 = coefficients[8];
549
13.9M
      float block11 = coefficients[9];
550
13.9M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
13.9M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
13.9M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
13.9M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
13.9M
      break;
555
0
    }
556
18.4M
    case Type::DCT2X2: {
557
18.4M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
18.4M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
18.4M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
18.4M
      break;
561
0
    }
562
6.05M
    case Type::DCT16X16: {
563
6.05M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
6.05M
                                 scratch_space);
565
6.05M
      break;
566
0
    }
567
11.7M
    case Type::DCT16X8: {
568
11.7M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
11.7M
                                scratch_space);
570
11.7M
      break;
571
0
    }
572
11.7M
    case Type::DCT8X16: {
573
11.7M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
11.7M
                                scratch_space);
575
11.7M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
2.33M
    case Type::DCT32X16: {
588
2.33M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
2.33M
                                 scratch_space);
590
2.33M
      break;
591
0
    }
592
2.30M
    case Type::DCT16X32: {
593
2.30M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
2.30M
                                 scratch_space);
595
2.30M
      break;
596
0
    }
597
1.32M
    case Type::DCT32X32: {
598
1.32M
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
1.32M
                                 scratch_space);
600
1.32M
      break;
601
0
    }
602
31.3M
    case Type::DCT: {
603
31.3M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
31.3M
                               scratch_space);
605
31.3M
      break;
606
0
    }
607
14.1M
    case Type::AFV0: {
608
14.1M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
14.1M
      break;
610
0
    }
611
14.0M
    case Type::AFV1: {
612
14.0M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
14.0M
      break;
614
0
    }
615
14.1M
    case Type::AFV2: {
616
14.1M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
14.1M
      break;
618
0
    }
619
14.1M
    case Type::AFV3: {
620
14.1M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
14.1M
      break;
622
0
    }
623
301k
    case Type::DCT64X64: {
624
301k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
301k
                                 scratch_space);
626
301k
      break;
627
0
    }
628
707k
    case Type::DCT64X32: {
629
707k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
707k
                                 scratch_space);
631
707k
      break;
632
0
    }
633
425k
    case Type::DCT32X64: {
634
425k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
425k
                                 scratch_space);
636
425k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
201M
  }
669
201M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
6.41M
                                          float* JXL_RESTRICT scratch_space) {
462
6.41M
  using Type = AcStrategyType;
463
6.41M
  switch (strategy) {
464
817k
    case Type::IDENTITY: {
465
2.45M
      for (size_t y = 0; y < 2; y++) {
466
4.90M
        for (size_t x = 0; x < 2; x++) {
467
3.27M
          float block_dc = 0;
468
16.3M
          for (size_t iy = 0; iy < 4; iy++) {
469
65.4M
            for (size_t ix = 0; ix < 4; ix++) {
470
52.3M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
52.3M
            }
472
13.0M
          }
473
3.27M
          block_dc *= 1.0f / 16;
474
16.3M
          for (size_t iy = 0; iy < 4; iy++) {
475
65.4M
            for (size_t ix = 0; ix < 4; ix++) {
476
52.3M
              if (ix == 1 && iy == 1) continue;
477
49.0M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
49.0M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
49.0M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
49.0M
            }
481
13.0M
          }
482
3.27M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
3.27M
          coefficients[y * 8 + x] = block_dc;
484
3.27M
        }
485
1.63M
      }
486
817k
      float block00 = coefficients[0];
487
817k
      float block01 = coefficients[1];
488
817k
      float block10 = coefficients[8];
489
817k
      float block11 = coefficients[9];
490
817k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
817k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
817k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
817k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
817k
      break;
495
0
    }
496
182k
    case Type::DCT8X4: {
497
546k
      for (size_t x = 0; x < 2; x++) {
498
364k
        HWY_ALIGN float block[4 * 8];
499
364k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
364k
                                 scratch_space);
501
1.82M
        for (size_t iy = 0; iy < 4; iy++) {
502
13.1M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
11.6M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
11.6M
          }
506
1.45M
        }
507
364k
      }
508
182k
      float block0 = coefficients[0];
509
182k
      float block1 = coefficients[8];
510
182k
      coefficients[0] = (block0 + block1) * 0.5f;
511
182k
      coefficients[8] = (block0 - block1) * 0.5f;
512
182k
      break;
513
0
    }
514
77.5k
    case Type::DCT4X8: {
515
232k
      for (size_t y = 0; y < 2; y++) {
516
155k
        HWY_ALIGN float block[4 * 8];
517
155k
        ComputeScaledDCT<4, 8>()(
518
155k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
155k
            scratch_space);
520
775k
        for (size_t iy = 0; iy < 4; iy++) {
521
5.58M
          for (size_t ix = 0; ix < 8; ix++) {
522
4.96M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
4.96M
          }
524
620k
        }
525
155k
      }
526
77.5k
      float block0 = coefficients[0];
527
77.5k
      float block1 = coefficients[8];
528
77.5k
      coefficients[0] = (block0 + block1) * 0.5f;
529
77.5k
      coefficients[8] = (block0 - block1) * 0.5f;
530
77.5k
      break;
531
0
    }
532
234
    case Type::DCT4X4: {
533
702
      for (size_t y = 0; y < 2; y++) {
534
1.40k
        for (size_t x = 0; x < 2; x++) {
535
936
          HWY_ALIGN float block[4 * 4];
536
936
          ComputeScaledDCT<4, 4>()(
537
936
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
936
              block, scratch_space);
539
4.68k
          for (size_t iy = 0; iy < 4; iy++) {
540
18.7k
            for (size_t ix = 0; ix < 4; ix++) {
541
14.9k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
14.9k
            }
543
3.74k
          }
544
936
        }
545
468
      }
546
234
      float block00 = coefficients[0];
547
234
      float block01 = coefficients[1];
548
234
      float block10 = coefficients[8];
549
234
      float block11 = coefficients[9];
550
234
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
234
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
234
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
234
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
234
      break;
555
0
    }
556
2.25M
    case Type::DCT2X2: {
557
2.25M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
2.25M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
2.25M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
2.25M
      break;
561
0
    }
562
181k
    case Type::DCT16X16: {
563
181k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
181k
                                 scratch_space);
565
181k
      break;
566
0
    }
567
257k
    case Type::DCT16X8: {
568
257k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
257k
                                scratch_space);
570
257k
      break;
571
0
    }
572
276k
    case Type::DCT8X16: {
573
276k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
276k
                                scratch_space);
575
276k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
57.7k
    case Type::DCT32X16: {
588
57.7k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
57.7k
                                 scratch_space);
590
57.7k
      break;
591
0
    }
592
57.7k
    case Type::DCT16X32: {
593
57.7k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
57.7k
                                 scratch_space);
595
57.7k
      break;
596
0
    }
597
99.5k
    case Type::DCT32X32: {
598
99.5k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
99.5k
                                 scratch_space);
600
99.5k
      break;
601
0
    }
602
1.73M
    case Type::DCT: {
603
1.73M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
1.73M
                               scratch_space);
605
1.73M
      break;
606
0
    }
607
114k
    case Type::AFV0: {
608
114k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
114k
      break;
610
0
    }
611
64.3k
    case Type::AFV1: {
612
64.3k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
64.3k
      break;
614
0
    }
615
81.3k
    case Type::AFV2: {
616
81.3k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
81.3k
      break;
618
0
    }
619
82.9k
    case Type::AFV3: {
620
82.9k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
82.9k
      break;
622
0
    }
623
54.5k
    case Type::DCT64X64: {
624
54.5k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
54.5k
                                 scratch_space);
626
54.5k
      break;
627
0
    }
628
16.2k
    case Type::DCT64X32: {
629
16.2k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
16.2k
                                 scratch_space);
631
16.2k
      break;
632
0
    }
633
6.41k
    case Type::DCT32X64: {
634
6.41k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
6.41k
                                 scratch_space);
636
6.41k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
6.41M
  }
669
6.41M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
20.3M
                                          float* JXL_RESTRICT scratch_space) {
462
20.3M
  using Type = AcStrategyType;
463
20.3M
  switch (strategy) {
464
817k
    case Type::IDENTITY: {
465
2.45M
      for (size_t y = 0; y < 2; y++) {
466
4.90M
        for (size_t x = 0; x < 2; x++) {
467
3.27M
          float block_dc = 0;
468
16.3M
          for (size_t iy = 0; iy < 4; iy++) {
469
65.4M
            for (size_t ix = 0; ix < 4; ix++) {
470
52.3M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
52.3M
            }
472
13.0M
          }
473
3.27M
          block_dc *= 1.0f / 16;
474
16.3M
          for (size_t iy = 0; iy < 4; iy++) {
475
65.4M
            for (size_t ix = 0; ix < 4; ix++) {
476
52.3M
              if (ix == 1 && iy == 1) continue;
477
49.0M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
49.0M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
49.0M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
49.0M
            }
481
13.0M
          }
482
3.27M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
3.27M
          coefficients[y * 8 + x] = block_dc;
484
3.27M
        }
485
1.63M
      }
486
817k
      float block00 = coefficients[0];
487
817k
      float block01 = coefficients[1];
488
817k
      float block10 = coefficients[8];
489
817k
      float block11 = coefficients[9];
490
817k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
817k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
817k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
817k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
817k
      break;
495
0
    }
496
182k
    case Type::DCT8X4: {
497
546k
      for (size_t x = 0; x < 2; x++) {
498
364k
        HWY_ALIGN float block[4 * 8];
499
364k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
364k
                                 scratch_space);
501
1.82M
        for (size_t iy = 0; iy < 4; iy++) {
502
13.1M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
11.6M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
11.6M
          }
506
1.45M
        }
507
364k
      }
508
182k
      float block0 = coefficients[0];
509
182k
      float block1 = coefficients[8];
510
182k
      coefficients[0] = (block0 + block1) * 0.5f;
511
182k
      coefficients[8] = (block0 - block1) * 0.5f;
512
182k
      break;
513
0
    }
514
77.5k
    case Type::DCT4X8: {
515
232k
      for (size_t y = 0; y < 2; y++) {
516
155k
        HWY_ALIGN float block[4 * 8];
517
155k
        ComputeScaledDCT<4, 8>()(
518
155k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
155k
            scratch_space);
520
775k
        for (size_t iy = 0; iy < 4; iy++) {
521
5.58M
          for (size_t ix = 0; ix < 8; ix++) {
522
4.96M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
4.96M
          }
524
620k
        }
525
155k
      }
526
77.5k
      float block0 = coefficients[0];
527
77.5k
      float block1 = coefficients[8];
528
77.5k
      coefficients[0] = (block0 + block1) * 0.5f;
529
77.5k
      coefficients[8] = (block0 - block1) * 0.5f;
530
77.5k
      break;
531
0
    }
532
234
    case Type::DCT4X4: {
533
702
      for (size_t y = 0; y < 2; y++) {
534
1.40k
        for (size_t x = 0; x < 2; x++) {
535
936
          HWY_ALIGN float block[4 * 4];
536
936
          ComputeScaledDCT<4, 4>()(
537
936
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
936
              block, scratch_space);
539
4.68k
          for (size_t iy = 0; iy < 4; iy++) {
540
18.7k
            for (size_t ix = 0; ix < 4; ix++) {
541
14.9k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
14.9k
            }
543
3.74k
          }
544
936
        }
545
468
      }
546
234
      float block00 = coefficients[0];
547
234
      float block01 = coefficients[1];
548
234
      float block10 = coefficients[8];
549
234
      float block11 = coefficients[9];
550
234
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
234
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
234
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
234
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
234
      break;
555
0
    }
556
2.25M
    case Type::DCT2X2: {
557
2.25M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
2.25M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
2.25M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
2.25M
      break;
561
0
    }
562
181k
    case Type::DCT16X16: {
563
181k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
181k
                                 scratch_space);
565
181k
      break;
566
0
    }
567
257k
    case Type::DCT16X8: {
568
257k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
257k
                                scratch_space);
570
257k
      break;
571
0
    }
572
276k
    case Type::DCT8X16: {
573
276k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
276k
                                scratch_space);
575
276k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
57.7k
    case Type::DCT32X16: {
588
57.7k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
57.7k
                                 scratch_space);
590
57.7k
      break;
591
0
    }
592
57.7k
    case Type::DCT16X32: {
593
57.7k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
57.7k
                                 scratch_space);
595
57.7k
      break;
596
0
    }
597
99.5k
    case Type::DCT32X32: {
598
99.5k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
99.5k
                                 scratch_space);
600
99.5k
      break;
601
0
    }
602
15.6M
    case Type::DCT: {
603
15.6M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
15.6M
                               scratch_space);
605
15.6M
      break;
606
0
    }
607
114k
    case Type::AFV0: {
608
114k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
114k
      break;
610
0
    }
611
64.3k
    case Type::AFV1: {
612
64.3k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
64.3k
      break;
614
0
    }
615
81.3k
    case Type::AFV2: {
616
81.3k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
81.3k
      break;
618
0
    }
619
82.9k
    case Type::AFV3: {
620
82.9k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
82.9k
      break;
622
0
    }
623
54.5k
    case Type::DCT64X64: {
624
54.5k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
54.5k
                                 scratch_space);
626
54.5k
      break;
627
0
    }
628
16.2k
    case Type::DCT64X32: {
629
16.2k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
16.2k
                                 scratch_space);
631
16.2k
      break;
632
0
    }
633
6.41k
    case Type::DCT32X64: {
634
6.41k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
6.41k
                                 scratch_space);
636
6.41k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
20.3M
  }
669
20.3M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
174M
                                          float* JXL_RESTRICT scratch_space) {
462
174M
  using Type = AcStrategyType;
463
174M
  switch (strategy) {
464
13.9M
    case Type::IDENTITY: {
465
41.8M
      for (size_t y = 0; y < 2; y++) {
466
83.6M
        for (size_t x = 0; x < 2; x++) {
467
55.7M
          float block_dc = 0;
468
278M
          for (size_t iy = 0; iy < 4; iy++) {
469
1.11G
            for (size_t ix = 0; ix < 4; ix++) {
470
892M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
892M
            }
472
223M
          }
473
55.7M
          block_dc *= 1.0f / 16;
474
278M
          for (size_t iy = 0; iy < 4; iy++) {
475
1.11G
            for (size_t ix = 0; ix < 4; ix++) {
476
892M
              if (ix == 1 && iy == 1) continue;
477
836M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
836M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
836M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
836M
            }
481
223M
          }
482
55.7M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
55.7M
          coefficients[y * 8 + x] = block_dc;
484
55.7M
        }
485
27.8M
      }
486
13.9M
      float block00 = coefficients[0];
487
13.9M
      float block01 = coefficients[1];
488
13.9M
      float block10 = coefficients[8];
489
13.9M
      float block11 = coefficients[9];
490
13.9M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
13.9M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
13.9M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
13.9M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
13.9M
      break;
495
0
    }
496
13.9M
    case Type::DCT8X4: {
497
41.8M
      for (size_t x = 0; x < 2; x++) {
498
27.8M
        HWY_ALIGN float block[4 * 8];
499
27.8M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
27.8M
                                 scratch_space);
501
139M
        for (size_t iy = 0; iy < 4; iy++) {
502
1.00G
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
892M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
892M
          }
506
111M
        }
507
27.8M
      }
508
13.9M
      float block0 = coefficients[0];
509
13.9M
      float block1 = coefficients[8];
510
13.9M
      coefficients[0] = (block0 + block1) * 0.5f;
511
13.9M
      coefficients[8] = (block0 - block1) * 0.5f;
512
13.9M
      break;
513
0
    }
514
13.9M
    case Type::DCT4X8: {
515
41.8M
      for (size_t y = 0; y < 2; y++) {
516
27.8M
        HWY_ALIGN float block[4 * 8];
517
27.8M
        ComputeScaledDCT<4, 8>()(
518
27.8M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
27.8M
            scratch_space);
520
139M
        for (size_t iy = 0; iy < 4; iy++) {
521
1.00G
          for (size_t ix = 0; ix < 8; ix++) {
522
892M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
892M
          }
524
111M
        }
525
27.8M
      }
526
13.9M
      float block0 = coefficients[0];
527
13.9M
      float block1 = coefficients[8];
528
13.9M
      coefficients[0] = (block0 + block1) * 0.5f;
529
13.9M
      coefficients[8] = (block0 - block1) * 0.5f;
530
13.9M
      break;
531
0
    }
532
13.9M
    case Type::DCT4X4: {
533
41.8M
      for (size_t y = 0; y < 2; y++) {
534
83.6M
        for (size_t x = 0; x < 2; x++) {
535
55.7M
          HWY_ALIGN float block[4 * 4];
536
55.7M
          ComputeScaledDCT<4, 4>()(
537
55.7M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
55.7M
              block, scratch_space);
539
278M
          for (size_t iy = 0; iy < 4; iy++) {
540
1.11G
            for (size_t ix = 0; ix < 4; ix++) {
541
892M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
892M
            }
543
223M
          }
544
55.7M
        }
545
27.8M
      }
546
13.9M
      float block00 = coefficients[0];
547
13.9M
      float block01 = coefficients[1];
548
13.9M
      float block10 = coefficients[8];
549
13.9M
      float block11 = coefficients[9];
550
13.9M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
13.9M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
13.9M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
13.9M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
13.9M
      break;
555
0
    }
556
13.9M
    case Type::DCT2X2: {
557
13.9M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
13.9M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
13.9M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
13.9M
      break;
561
0
    }
562
5.69M
    case Type::DCT16X16: {
563
5.69M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
5.69M
                                 scratch_space);
565
5.69M
      break;
566
0
    }
567
11.2M
    case Type::DCT16X8: {
568
11.2M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
11.2M
                                scratch_space);
570
11.2M
      break;
571
0
    }
572
11.2M
    case Type::DCT8X16: {
573
11.2M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
11.2M
                                scratch_space);
575
11.2M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
2.21M
    case Type::DCT32X16: {
588
2.21M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
2.21M
                                 scratch_space);
590
2.21M
      break;
591
0
    }
592
2.19M
    case Type::DCT16X32: {
593
2.19M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
2.19M
                                 scratch_space);
595
2.19M
      break;
596
0
    }
597
1.12M
    case Type::DCT32X32: {
598
1.12M
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
1.12M
                                 scratch_space);
600
1.12M
      break;
601
0
    }
602
13.9M
    case Type::DCT: {
603
13.9M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
13.9M
                               scratch_space);
605
13.9M
      break;
606
0
    }
607
13.9M
    case Type::AFV0: {
608
13.9M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
13.9M
      break;
610
0
    }
611
13.9M
    case Type::AFV1: {
612
13.9M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
13.9M
      break;
614
0
    }
615
13.9M
    case Type::AFV2: {
616
13.9M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
13.9M
      break;
618
0
    }
619
13.9M
    case Type::AFV3: {
620
13.9M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
13.9M
      break;
622
0
    }
623
192k
    case Type::DCT64X64: {
624
192k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
192k
                                 scratch_space);
626
192k
      break;
627
0
    }
628
674k
    case Type::DCT64X32: {
629
674k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
674k
                                 scratch_space);
631
674k
      break;
632
0
    }
633
412k
    case Type::DCT32X64: {
634
412k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
412k
                                 scratch_space);
636
412k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
174M
  }
669
174M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
670
671
// `scratch_space` should be at least 4 * kMaxBlocks * kMaxBlocks elements.
672
HWY_MAYBE_UNUSED void DCFromLowestFrequencies(const AcStrategyType strategy,
673
                                              const float* block, float* dc,
674
                                              size_t dc_stride,
675
26.7M
                                              float* scratch_space) {
676
26.7M
  using Type = AcStrategyType;
677
26.7M
  switch (strategy) {
678
515k
    case Type::DCT16X8: {
679
515k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
515k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
515k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
515k
      break;
683
0
    }
684
552k
    case Type::DCT8X16: {
685
552k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
552k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
552k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
552k
      break;
689
0
    }
690
363k
    case Type::DCT16X16: {
691
363k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
363k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
363k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
363k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
115k
    case Type::DCT32X16: {
709
115k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
115k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
115k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
115k
      break;
713
0
    }
714
115k
    case Type::DCT16X32: {
715
115k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
115k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
115k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
115k
      break;
719
0
    }
720
199k
    case Type::DCT32X32: {
721
199k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
199k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
199k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
199k
      break;
725
0
    }
726
32.4k
    case Type::DCT64X32: {
727
32.4k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
32.4k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
32.4k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
32.4k
      break;
731
0
    }
732
12.8k
    case Type::DCT32X64: {
733
12.8k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
12.8k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
12.8k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
12.8k
      break;
737
0
    }
738
109k
    case Type::DCT64X64: {
739
109k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
109k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
109k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
109k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
17.4M
    case Type::DCT:
787
21.9M
    case Type::DCT2X2:
788
21.9M
    case Type::DCT4X4:
789
22.0M
    case Type::DCT4X8:
790
22.4M
    case Type::DCT8X4:
791
22.6M
    case Type::AFV0:
792
22.8M
    case Type::AFV1:
793
22.9M
    case Type::AFV2:
794
23.1M
    case Type::AFV3:
795
24.7M
    case Type::IDENTITY:
796
24.7M
      dc[0] = block[0];
797
24.7M
      break;
798
26.7M
  }
799
26.7M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
6.41M
                                              float* scratch_space) {
676
6.41M
  using Type = AcStrategyType;
677
6.41M
  switch (strategy) {
678
257k
    case Type::DCT16X8: {
679
257k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
257k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
257k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
257k
      break;
683
0
    }
684
276k
    case Type::DCT8X16: {
685
276k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
276k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
276k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
276k
      break;
689
0
    }
690
181k
    case Type::DCT16X16: {
691
181k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
181k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
181k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
181k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
57.7k
    case Type::DCT32X16: {
709
57.7k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
57.7k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
57.7k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
57.7k
      break;
713
0
    }
714
57.7k
    case Type::DCT16X32: {
715
57.7k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
57.7k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
57.7k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
57.7k
      break;
719
0
    }
720
99.5k
    case Type::DCT32X32: {
721
99.5k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
99.5k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
99.5k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
99.5k
      break;
725
0
    }
726
16.2k
    case Type::DCT64X32: {
727
16.2k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
16.2k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
16.2k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
16.2k
      break;
731
0
    }
732
6.41k
    case Type::DCT32X64: {
733
6.41k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
6.41k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
6.41k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
6.41k
      break;
737
0
    }
738
54.5k
    case Type::DCT64X64: {
739
54.5k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
54.5k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
54.5k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
54.5k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
1.73M
    case Type::DCT:
787
3.98M
    case Type::DCT2X2:
788
3.99M
    case Type::DCT4X4:
789
4.06M
    case Type::DCT4X8:
790
4.24M
    case Type::DCT8X4:
791
4.36M
    case Type::AFV0:
792
4.42M
    case Type::AFV1:
793
4.51M
    case Type::AFV2:
794
4.59M
    case Type::AFV3:
795
5.41M
    case Type::IDENTITY:
796
5.41M
      dc[0] = block[0];
797
5.41M
      break;
798
6.41M
  }
799
6.41M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
20.3M
                                              float* scratch_space) {
676
20.3M
  using Type = AcStrategyType;
677
20.3M
  switch (strategy) {
678
257k
    case Type::DCT16X8: {
679
257k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
257k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
257k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
257k
      break;
683
0
    }
684
276k
    case Type::DCT8X16: {
685
276k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
276k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
276k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
276k
      break;
689
0
    }
690
181k
    case Type::DCT16X16: {
691
181k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
181k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
181k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
181k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
57.7k
    case Type::DCT32X16: {
709
57.7k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
57.7k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
57.7k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
57.7k
      break;
713
0
    }
714
57.7k
    case Type::DCT16X32: {
715
57.7k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
57.7k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
57.7k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
57.7k
      break;
719
0
    }
720
99.5k
    case Type::DCT32X32: {
721
99.5k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
99.5k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
99.5k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
99.5k
      break;
725
0
    }
726
16.2k
    case Type::DCT64X32: {
727
16.2k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
16.2k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
16.2k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
16.2k
      break;
731
0
    }
732
6.41k
    case Type::DCT32X64: {
733
6.41k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
6.41k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
6.41k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
6.41k
      break;
737
0
    }
738
54.5k
    case Type::DCT64X64: {
739
54.5k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
54.5k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
54.5k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
54.5k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
15.6M
    case Type::DCT:
787
17.9M
    case Type::DCT2X2:
788
17.9M
    case Type::DCT4X4:
789
18.0M
    case Type::DCT4X8:
790
18.1M
    case Type::DCT8X4:
791
18.3M
    case Type::AFV0:
792
18.3M
    case Type::AFV1:
793
18.4M
    case Type::AFV2:
794
18.5M
    case Type::AFV3:
795
19.3M
    case Type::IDENTITY:
796
19.3M
      dc[0] = block[0];
797
19.3M
      break;
798
20.3M
  }
799
20.3M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
800
801
}  // namespace
802
// NOLINTNEXTLINE(google-readability-namespace-comments)
803
}  // namespace HWY_NAMESPACE
804
}  // namespace jxl
805
HWY_AFTER_NAMESPACE();
806
807
#endif  // LIB_JXL_ENC_TRANSFORMS_INL_H_