Coverage Report

Created: 2025-08-12 07:37

/src/libjxl/lib/jxl/enc_transforms-inl.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/base/compiler_specific.h"
7
#include "lib/jxl/frame_dimensions.h"
8
9
#if defined(LIB_JXL_ENC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
10
#ifdef LIB_JXL_ENC_TRANSFORMS_INL_H_
11
#undef LIB_JXL_ENC_TRANSFORMS_INL_H_
12
#else
13
#define LIB_JXL_ENC_TRANSFORMS_INL_H_
14
#endif
15
16
#include <cstddef>
17
#include <cstdint>
18
#include <hwy/highway.h>
19
20
#include "lib/jxl/ac_strategy.h"
21
#include "lib/jxl/dct-inl.h"
22
#include "lib/jxl/dct_scales.h"
23
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
27
enum class AcStrategyType : uint32_t;
28
29
namespace HWY_NAMESPACE {
30
namespace {
31
32
constexpr size_t kMaxBlocks = 32;
33
34
// Inverse of ReinterpretingDCT.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
HWY_INLINE void ReinterpretingIDCT(const float* input,
38
                                   const size_t input_stride, float* output,
39
180k
                                   const size_t output_stride, float* scratch) {
40
180k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
180k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
180k
  float* block = scratch;
43
180k
  if (ROWS < COLS) {
44
136k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
294k
      for (size_t x = 0; x < LF_COLS; x++) {
46
218k
        block[y * COLS + x] = input[y * input_stride + x] *
47
218k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
218k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
218k
      }
50
76.1k
    }
51
119k
  } else {
52
398k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.26M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
990k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
990k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
990k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
990k
      }
58
278k
    }
59
119k
  }
60
61
180k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
180k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
180k
                                  scratch_space);
64
180k
}
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
19.5k
                                   const size_t output_stride, float* scratch) {
40
19.5k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
19.5k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
19.5k
  float* block = scratch;
43
19.5k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
19.5k
  } else {
52
39.1k
    for (size_t y = 0; y < LF_COLS; y++) {
53
58.7k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
39.1k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
39.1k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
39.1k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
39.1k
      }
58
19.5k
    }
59
19.5k
  }
60
61
19.5k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
19.5k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
19.5k
                                  scratch_space);
64
19.5k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
22.3k
                                   const size_t output_stride, float* scratch) {
40
22.3k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
22.3k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
22.3k
  float* block = scratch;
43
22.3k
  if (ROWS < COLS) {
44
44.6k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
66.9k
      for (size_t x = 0; x < LF_COLS; x++) {
46
44.6k
        block[y * COLS + x] = input[y * input_stride + x] *
47
44.6k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
44.6k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
44.6k
      }
50
22.3k
    }
51
22.3k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
22.3k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
22.3k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
22.3k
                                  scratch_space);
64
22.3k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
16.7k
                                   const size_t output_stride, float* scratch) {
40
16.7k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
16.7k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
16.7k
  float* block = scratch;
43
16.7k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
16.7k
  } else {
52
50.3k
    for (size_t y = 0; y < LF_COLS; y++) {
53
100k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
67.1k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
67.1k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
67.1k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
67.1k
      }
58
33.5k
    }
59
16.7k
  }
60
61
16.7k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
16.7k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
16.7k
                                  scratch_space);
64
16.7k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
6.44k
                                   const size_t output_stride, float* scratch) {
40
6.44k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
6.44k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
6.44k
  float* block = scratch;
43
6.44k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
6.44k
  } else {
52
19.3k
    for (size_t y = 0; y < LF_COLS; y++) {
53
64.4k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
51.5k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
51.5k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
51.5k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
51.5k
      }
58
12.8k
    }
59
6.44k
  }
60
61
6.44k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
6.44k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
6.44k
                                  scratch_space);
64
6.44k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
7.68k
                                   const size_t output_stride, float* scratch) {
40
7.68k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
7.68k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
7.68k
  float* block = scratch;
43
7.68k
  if (ROWS < COLS) {
44
23.0k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
76.8k
      for (size_t x = 0; x < LF_COLS; x++) {
46
61.5k
        block[y * COLS + x] = input[y * input_stride + x] *
47
61.5k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
61.5k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
61.5k
      }
50
15.3k
    }
51
7.68k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
7.68k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
7.68k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
7.68k
                                  scratch_space);
64
7.68k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
15.6k
                                   const size_t output_stride, float* scratch) {
40
15.6k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
15.6k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
15.6k
  float* block = scratch;
43
15.6k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
15.6k
  } else {
52
78.0k
    for (size_t y = 0; y < LF_COLS; y++) {
53
312k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
249k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
249k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
249k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
249k
      }
58
62.4k
    }
59
15.6k
  }
60
61
15.6k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
15.6k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
15.6k
                                  scratch_space);
64
15.6k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
198
                                   const size_t output_stride, float* scratch) {
40
198
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
198
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
198
  float* block = scratch;
43
198
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
198
  } else {
52
990
    for (size_t y = 0; y < LF_COLS; y++) {
53
7.12k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
6.33k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
6.33k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
6.33k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
6.33k
      }
58
792
    }
59
198
  }
60
61
198
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
198
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
198
                                  scratch_space);
64
198
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
96
                                   const size_t output_stride, float* scratch) {
40
96
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
96
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
96
  float* block = scratch;
43
96
  if (ROWS < COLS) {
44
480
    for (size_t y = 0; y < LF_ROWS; y++) {
45
3.45k
      for (size_t x = 0; x < LF_COLS; x++) {
46
3.07k
        block[y * COLS + x] = input[y * input_stride + x] *
47
3.07k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
3.07k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
3.07k
      }
50
384
    }
51
96
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
96
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
96
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
96
                                  scratch_space);
64
96
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
1.26k
                                   const size_t output_stride, float* scratch) {
40
1.26k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
1.26k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
1.26k
  float* block = scratch;
43
1.26k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
1.26k
  } else {
52
11.4k
    for (size_t y = 0; y < LF_COLS; y++) {
53
91.3k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
81.2k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
81.2k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
81.2k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
81.2k
      }
58
10.1k
    }
59
1.26k
  }
60
61
1.26k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
1.26k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
1.26k
                                  scratch_space);
64
1.26k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
19.5k
                                   const size_t output_stride, float* scratch) {
40
19.5k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
19.5k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
19.5k
  float* block = scratch;
43
19.5k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
19.5k
  } else {
52
39.1k
    for (size_t y = 0; y < LF_COLS; y++) {
53
58.7k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
39.1k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
39.1k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
39.1k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
39.1k
      }
58
19.5k
    }
59
19.5k
  }
60
61
19.5k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
19.5k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
19.5k
                                  scratch_space);
64
19.5k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
22.3k
                                   const size_t output_stride, float* scratch) {
40
22.3k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
22.3k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
22.3k
  float* block = scratch;
43
22.3k
  if (ROWS < COLS) {
44
44.6k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
66.9k
      for (size_t x = 0; x < LF_COLS; x++) {
46
44.6k
        block[y * COLS + x] = input[y * input_stride + x] *
47
44.6k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
44.6k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
44.6k
      }
50
22.3k
    }
51
22.3k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
22.3k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
22.3k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
22.3k
                                  scratch_space);
64
22.3k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
16.7k
                                   const size_t output_stride, float* scratch) {
40
16.7k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
16.7k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
16.7k
  float* block = scratch;
43
16.7k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
16.7k
  } else {
52
50.3k
    for (size_t y = 0; y < LF_COLS; y++) {
53
100k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
67.1k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
67.1k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
67.1k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
67.1k
      }
58
33.5k
    }
59
16.7k
  }
60
61
16.7k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
16.7k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
16.7k
                                  scratch_space);
64
16.7k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
6.44k
                                   const size_t output_stride, float* scratch) {
40
6.44k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
6.44k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
6.44k
  float* block = scratch;
43
6.44k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
6.44k
  } else {
52
19.3k
    for (size_t y = 0; y < LF_COLS; y++) {
53
64.4k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
51.5k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
51.5k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
51.5k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
51.5k
      }
58
12.8k
    }
59
6.44k
  }
60
61
6.44k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
6.44k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
6.44k
                                  scratch_space);
64
6.44k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
7.68k
                                   const size_t output_stride, float* scratch) {
40
7.68k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
7.68k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
7.68k
  float* block = scratch;
43
7.68k
  if (ROWS < COLS) {
44
23.0k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
76.8k
      for (size_t x = 0; x < LF_COLS; x++) {
46
61.5k
        block[y * COLS + x] = input[y * input_stride + x] *
47
61.5k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
61.5k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
61.5k
      }
50
15.3k
    }
51
7.68k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
7.68k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
7.68k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
7.68k
                                  scratch_space);
64
7.68k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
15.6k
                                   const size_t output_stride, float* scratch) {
40
15.6k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
15.6k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
15.6k
  float* block = scratch;
43
15.6k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
15.6k
  } else {
52
78.0k
    for (size_t y = 0; y < LF_COLS; y++) {
53
312k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
249k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
249k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
249k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
249k
      }
58
62.4k
    }
59
15.6k
  }
60
61
15.6k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
15.6k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
15.6k
                                  scratch_space);
64
15.6k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
198
                                   const size_t output_stride, float* scratch) {
40
198
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
198
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
198
  float* block = scratch;
43
198
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
198
  } else {
52
990
    for (size_t y = 0; y < LF_COLS; y++) {
53
7.12k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
6.33k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
6.33k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
6.33k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
6.33k
      }
58
792
    }
59
198
  }
60
61
198
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
198
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
198
                                  scratch_space);
64
198
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
96
                                   const size_t output_stride, float* scratch) {
40
96
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
96
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
96
  float* block = scratch;
43
96
  if (ROWS < COLS) {
44
480
    for (size_t y = 0; y < LF_ROWS; y++) {
45
3.45k
      for (size_t x = 0; x < LF_COLS; x++) {
46
3.07k
        block[y * COLS + x] = input[y * input_stride + x] *
47
3.07k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
3.07k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
3.07k
      }
50
384
    }
51
96
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
96
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
96
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
96
                                  scratch_space);
64
96
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
1.26k
                                   const size_t output_stride, float* scratch) {
40
1.26k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
1.26k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
1.26k
  float* block = scratch;
43
1.26k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
1.26k
  } else {
52
11.4k
    for (size_t y = 0; y < LF_COLS; y++) {
53
91.3k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
81.2k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
81.2k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
81.2k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
81.2k
      }
58
10.1k
    }
59
1.26k
  }
60
61
1.26k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
1.26k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
1.26k
                                  scratch_space);
64
1.26k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
65
66
template <size_t S>
67
3.99M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
3.99M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
3.99M
  static_assert(S % 2 == 0, "S should be even");
70
3.99M
  float temp[kDCTBlockSize];
71
3.99M
  constexpr size_t num_2x2 = S / 2;
72
13.3M
  for (size_t y = 0; y < num_2x2; y++) {
73
37.2M
    for (size_t x = 0; x < num_2x2; x++) {
74
27.9M
      float c00 = block[y * 2 * stride + x * 2];
75
27.9M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
27.9M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
27.9M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
27.9M
      float r00 = c00 + c01 + c10 + c11;
79
27.9M
      float r01 = c00 + c01 - c10 - c11;
80
27.9M
      float r10 = c00 - c01 + c10 - c11;
81
27.9M
      float r11 = c00 - c01 - c10 + c11;
82
27.9M
      r00 *= 0.25f;
83
27.9M
      r01 *= 0.25f;
84
27.9M
      r10 *= 0.25f;
85
27.9M
      r11 *= 0.25f;
86
27.9M
      temp[y * kBlockDim + x] = r00;
87
27.9M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
27.9M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
27.9M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
27.9M
    }
91
9.31M
  }
92
22.6M
  for (size_t y = 0; y < S; y++) {
93
130M
    for (size_t x = 0; x < S; x++) {
94
111M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
111M
    }
96
18.6M
  }
97
3.99M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
135k
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
135k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
135k
  static_assert(S % 2 == 0, "S should be even");
70
135k
  float temp[kDCTBlockSize];
71
135k
  constexpr size_t num_2x2 = S / 2;
72
676k
  for (size_t y = 0; y < num_2x2; y++) {
73
2.70M
    for (size_t x = 0; x < num_2x2; x++) {
74
2.16M
      float c00 = block[y * 2 * stride + x * 2];
75
2.16M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
2.16M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
2.16M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
2.16M
      float r00 = c00 + c01 + c10 + c11;
79
2.16M
      float r01 = c00 + c01 - c10 - c11;
80
2.16M
      float r10 = c00 - c01 + c10 - c11;
81
2.16M
      float r11 = c00 - c01 - c10 + c11;
82
2.16M
      r00 *= 0.25f;
83
2.16M
      r01 *= 0.25f;
84
2.16M
      r10 *= 0.25f;
85
2.16M
      r11 *= 0.25f;
86
2.16M
      temp[y * kBlockDim + x] = r00;
87
2.16M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
2.16M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
2.16M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
2.16M
    }
91
541k
  }
92
1.21M
  for (size_t y = 0; y < S; y++) {
93
9.74M
    for (size_t x = 0; x < S; x++) {
94
8.65M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
8.65M
    }
96
1.08M
  }
97
135k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
135k
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
135k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
135k
  static_assert(S % 2 == 0, "S should be even");
70
135k
  float temp[kDCTBlockSize];
71
135k
  constexpr size_t num_2x2 = S / 2;
72
405k
  for (size_t y = 0; y < num_2x2; y++) {
73
811k
    for (size_t x = 0; x < num_2x2; x++) {
74
541k
      float c00 = block[y * 2 * stride + x * 2];
75
541k
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
541k
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
541k
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
541k
      float r00 = c00 + c01 + c10 + c11;
79
541k
      float r01 = c00 + c01 - c10 - c11;
80
541k
      float r10 = c00 - c01 + c10 - c11;
81
541k
      float r11 = c00 - c01 - c10 + c11;
82
541k
      r00 *= 0.25f;
83
541k
      r01 *= 0.25f;
84
541k
      r10 *= 0.25f;
85
541k
      r11 *= 0.25f;
86
541k
      temp[y * kBlockDim + x] = r00;
87
541k
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
541k
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
541k
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
541k
    }
91
270k
  }
92
676k
  for (size_t y = 0; y < S; y++) {
93
2.70M
    for (size_t x = 0; x < S; x++) {
94
2.16M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
2.16M
    }
96
541k
  }
97
135k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
135k
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
135k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
135k
  static_assert(S % 2 == 0, "S should be even");
70
135k
  float temp[kDCTBlockSize];
71
135k
  constexpr size_t num_2x2 = S / 2;
72
270k
  for (size_t y = 0; y < num_2x2; y++) {
73
270k
    for (size_t x = 0; x < num_2x2; x++) {
74
135k
      float c00 = block[y * 2 * stride + x * 2];
75
135k
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
135k
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
135k
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
135k
      float r00 = c00 + c01 + c10 + c11;
79
135k
      float r01 = c00 + c01 - c10 - c11;
80
135k
      float r10 = c00 - c01 + c10 - c11;
81
135k
      float r11 = c00 - c01 - c10 + c11;
82
135k
      r00 *= 0.25f;
83
135k
      r01 *= 0.25f;
84
135k
      r10 *= 0.25f;
85
135k
      r11 *= 0.25f;
86
135k
      temp[y * kBlockDim + x] = r00;
87
135k
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
135k
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
135k
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
135k
    }
91
135k
  }
92
405k
  for (size_t y = 0; y < S; y++) {
93
811k
    for (size_t x = 0; x < S; x++) {
94
541k
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
541k
    }
96
270k
  }
97
135k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
135k
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
135k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
135k
  static_assert(S % 2 == 0, "S should be even");
70
135k
  float temp[kDCTBlockSize];
71
135k
  constexpr size_t num_2x2 = S / 2;
72
676k
  for (size_t y = 0; y < num_2x2; y++) {
73
2.70M
    for (size_t x = 0; x < num_2x2; x++) {
74
2.16M
      float c00 = block[y * 2 * stride + x * 2];
75
2.16M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
2.16M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
2.16M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
2.16M
      float r00 = c00 + c01 + c10 + c11;
79
2.16M
      float r01 = c00 + c01 - c10 - c11;
80
2.16M
      float r10 = c00 - c01 + c10 - c11;
81
2.16M
      float r11 = c00 - c01 - c10 + c11;
82
2.16M
      r00 *= 0.25f;
83
2.16M
      r01 *= 0.25f;
84
2.16M
      r10 *= 0.25f;
85
2.16M
      r11 *= 0.25f;
86
2.16M
      temp[y * kBlockDim + x] = r00;
87
2.16M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
2.16M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
2.16M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
2.16M
    }
91
541k
  }
92
1.21M
  for (size_t y = 0; y < S; y++) {
93
9.74M
    for (size_t x = 0; x < S; x++) {
94
8.65M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
8.65M
    }
96
1.08M
  }
97
135k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
135k
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
135k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
135k
  static_assert(S % 2 == 0, "S should be even");
70
135k
  float temp[kDCTBlockSize];
71
135k
  constexpr size_t num_2x2 = S / 2;
72
405k
  for (size_t y = 0; y < num_2x2; y++) {
73
811k
    for (size_t x = 0; x < num_2x2; x++) {
74
541k
      float c00 = block[y * 2 * stride + x * 2];
75
541k
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
541k
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
541k
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
541k
      float r00 = c00 + c01 + c10 + c11;
79
541k
      float r01 = c00 + c01 - c10 - c11;
80
541k
      float r10 = c00 - c01 + c10 - c11;
81
541k
      float r11 = c00 - c01 - c10 + c11;
82
541k
      r00 *= 0.25f;
83
541k
      r01 *= 0.25f;
84
541k
      r10 *= 0.25f;
85
541k
      r11 *= 0.25f;
86
541k
      temp[y * kBlockDim + x] = r00;
87
541k
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
541k
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
541k
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
541k
    }
91
270k
  }
92
676k
  for (size_t y = 0; y < S; y++) {
93
2.70M
    for (size_t x = 0; x < S; x++) {
94
2.16M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
2.16M
    }
96
541k
  }
97
135k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
135k
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
135k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
135k
  static_assert(S % 2 == 0, "S should be even");
70
135k
  float temp[kDCTBlockSize];
71
135k
  constexpr size_t num_2x2 = S / 2;
72
270k
  for (size_t y = 0; y < num_2x2; y++) {
73
270k
    for (size_t x = 0; x < num_2x2; x++) {
74
135k
      float c00 = block[y * 2 * stride + x * 2];
75
135k
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
135k
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
135k
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
135k
      float r00 = c00 + c01 + c10 + c11;
79
135k
      float r01 = c00 + c01 - c10 - c11;
80
135k
      float r10 = c00 - c01 + c10 - c11;
81
135k
      float r11 = c00 - c01 - c10 + c11;
82
135k
      r00 *= 0.25f;
83
135k
      r01 *= 0.25f;
84
135k
      r10 *= 0.25f;
85
135k
      r11 *= 0.25f;
86
135k
      temp[y * kBlockDim + x] = r00;
87
135k
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
135k
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
135k
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
135k
    }
91
135k
  }
92
405k
  for (size_t y = 0; y < S; y++) {
93
811k
    for (size_t x = 0; x < S; x++) {
94
541k
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
541k
    }
96
270k
  }
97
135k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.05M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.05M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.05M
  static_assert(S % 2 == 0, "S should be even");
70
1.05M
  float temp[kDCTBlockSize];
71
1.05M
  constexpr size_t num_2x2 = S / 2;
72
5.29M
  for (size_t y = 0; y < num_2x2; y++) {
73
21.1M
    for (size_t x = 0; x < num_2x2; x++) {
74
16.9M
      float c00 = block[y * 2 * stride + x * 2];
75
16.9M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
16.9M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
16.9M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
16.9M
      float r00 = c00 + c01 + c10 + c11;
79
16.9M
      float r01 = c00 + c01 - c10 - c11;
80
16.9M
      float r10 = c00 - c01 + c10 - c11;
81
16.9M
      float r11 = c00 - c01 - c10 + c11;
82
16.9M
      r00 *= 0.25f;
83
16.9M
      r01 *= 0.25f;
84
16.9M
      r10 *= 0.25f;
85
16.9M
      r11 *= 0.25f;
86
16.9M
      temp[y * kBlockDim + x] = r00;
87
16.9M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
16.9M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
16.9M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
16.9M
    }
91
4.23M
  }
92
9.53M
  for (size_t y = 0; y < S; y++) {
93
76.2M
    for (size_t x = 0; x < S; x++) {
94
67.8M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
67.8M
    }
96
8.47M
  }
97
1.05M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.05M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.05M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.05M
  static_assert(S % 2 == 0, "S should be even");
70
1.05M
  float temp[kDCTBlockSize];
71
1.05M
  constexpr size_t num_2x2 = S / 2;
72
3.17M
  for (size_t y = 0; y < num_2x2; y++) {
73
6.35M
    for (size_t x = 0; x < num_2x2; x++) {
74
4.23M
      float c00 = block[y * 2 * stride + x * 2];
75
4.23M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
4.23M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
4.23M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
4.23M
      float r00 = c00 + c01 + c10 + c11;
79
4.23M
      float r01 = c00 + c01 - c10 - c11;
80
4.23M
      float r10 = c00 - c01 + c10 - c11;
81
4.23M
      float r11 = c00 - c01 - c10 + c11;
82
4.23M
      r00 *= 0.25f;
83
4.23M
      r01 *= 0.25f;
84
4.23M
      r10 *= 0.25f;
85
4.23M
      r11 *= 0.25f;
86
4.23M
      temp[y * kBlockDim + x] = r00;
87
4.23M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
4.23M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
4.23M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
4.23M
    }
91
2.11M
  }
92
5.29M
  for (size_t y = 0; y < S; y++) {
93
21.1M
    for (size_t x = 0; x < S; x++) {
94
16.9M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
16.9M
    }
96
4.23M
  }
97
1.05M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.05M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.05M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.05M
  static_assert(S % 2 == 0, "S should be even");
70
1.05M
  float temp[kDCTBlockSize];
71
1.05M
  constexpr size_t num_2x2 = S / 2;
72
2.11M
  for (size_t y = 0; y < num_2x2; y++) {
73
2.11M
    for (size_t x = 0; x < num_2x2; x++) {
74
1.05M
      float c00 = block[y * 2 * stride + x * 2];
75
1.05M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
1.05M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
1.05M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
1.05M
      float r00 = c00 + c01 + c10 + c11;
79
1.05M
      float r01 = c00 + c01 - c10 - c11;
80
1.05M
      float r10 = c00 - c01 + c10 - c11;
81
1.05M
      float r11 = c00 - c01 - c10 + c11;
82
1.05M
      r00 *= 0.25f;
83
1.05M
      r01 *= 0.25f;
84
1.05M
      r10 *= 0.25f;
85
1.05M
      r11 *= 0.25f;
86
1.05M
      temp[y * kBlockDim + x] = r00;
87
1.05M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
1.05M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
1.05M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
1.05M
    }
91
1.05M
  }
92
3.17M
  for (size_t y = 0; y < S; y++) {
93
6.35M
    for (size_t x = 0; x < S; x++) {
94
4.23M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
4.23M
    }
96
2.11M
  }
97
1.05M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
98
99
4.32M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
4.32M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
4.32M
      {
102
4.32M
          0.2500000000000000,
103
4.32M
          0.8769029297991420f,
104
4.32M
          0.0000000000000000,
105
4.32M
          0.0000000000000000,
106
4.32M
          0.0000000000000000,
107
4.32M
          -0.4105377591765233f,
108
4.32M
          0.0000000000000000,
109
4.32M
          0.0000000000000000,
110
4.32M
          0.0000000000000000,
111
4.32M
          0.0000000000000000,
112
4.32M
          0.0000000000000000,
113
4.32M
          0.0000000000000000,
114
4.32M
          0.0000000000000000,
115
4.32M
          0.0000000000000000,
116
4.32M
          0.0000000000000000,
117
4.32M
          0.0000000000000000,
118
4.32M
      },
119
4.32M
      {
120
4.32M
          0.2500000000000000,
121
4.32M
          0.2206518106944235f,
122
4.32M
          0.0000000000000000,
123
4.32M
          0.0000000000000000,
124
4.32M
          -0.7071067811865474f,
125
4.32M
          0.6235485373547691f,
126
4.32M
          0.0000000000000000,
127
4.32M
          0.0000000000000000,
128
4.32M
          0.0000000000000000,
129
4.32M
          0.0000000000000000,
130
4.32M
          0.0000000000000000,
131
4.32M
          0.0000000000000000,
132
4.32M
          0.0000000000000000,
133
4.32M
          0.0000000000000000,
134
4.32M
          0.0000000000000000,
135
4.32M
          0.0000000000000000,
136
4.32M
      },
137
4.32M
      {
138
4.32M
          0.2500000000000000,
139
4.32M
          -0.1014005039375376f,
140
4.32M
          0.4067007583026075f,
141
4.32M
          -0.2125574805828875f,
142
4.32M
          0.0000000000000000,
143
4.32M
          -0.0643507165794627f,
144
4.32M
          -0.4517556589999482f,
145
4.32M
          -0.3046847507248690f,
146
4.32M
          0.3017929516615495f,
147
4.32M
          0.4082482904638627f,
148
4.32M
          0.1747866975480809f,
149
4.32M
          -0.2110560104933578f,
150
4.32M
          -0.1426608480880726f,
151
4.32M
          -0.1381354035075859f,
152
4.32M
          -0.1743760259965107f,
153
4.32M
          0.1135498731499434f,
154
4.32M
      },
155
4.32M
      {
156
4.32M
          0.2500000000000000,
157
4.32M
          -0.1014005039375375f,
158
4.32M
          0.4444481661973445f,
159
4.32M
          0.3085497062849767f,
160
4.32M
          0.0000000000000000f,
161
4.32M
          -0.0643507165794627f,
162
4.32M
          0.1585450355184006f,
163
4.32M
          0.5112616136591823f,
164
4.32M
          0.2579236279634118f,
165
4.32M
          0.0000000000000000,
166
4.32M
          0.0812611176717539f,
167
4.32M
          0.1856718091610980f,
168
4.32M
          -0.3416446842253372f,
169
4.32M
          0.3302282550303788f,
170
4.32M
          0.0702790691196284f,
171
4.32M
          -0.0741750459581035f,
172
4.32M
      },
173
4.32M
      {
174
4.32M
          0.2500000000000000,
175
4.32M
          0.2206518106944236f,
176
4.32M
          0.0000000000000000,
177
4.32M
          0.0000000000000000,
178
4.32M
          0.7071067811865476f,
179
4.32M
          0.6235485373547694f,
180
4.32M
          0.0000000000000000,
181
4.32M
          0.0000000000000000,
182
4.32M
          0.0000000000000000,
183
4.32M
          0.0000000000000000,
184
4.32M
          0.0000000000000000,
185
4.32M
          0.0000000000000000,
186
4.32M
          0.0000000000000000,
187
4.32M
          0.0000000000000000,
188
4.32M
          0.0000000000000000,
189
4.32M
          0.0000000000000000,
190
4.32M
      },
191
4.32M
      {
192
4.32M
          0.2500000000000000,
193
4.32M
          -0.1014005039375378f,
194
4.32M
          0.0000000000000000,
195
4.32M
          0.4706702258572536f,
196
4.32M
          0.0000000000000000,
197
4.32M
          -0.0643507165794628f,
198
4.32M
          -0.0403851516082220f,
199
4.32M
          0.0000000000000000,
200
4.32M
          0.1627234014286620f,
201
4.32M
          0.0000000000000000,
202
4.32M
          0.0000000000000000,
203
4.32M
          0.0000000000000000,
204
4.32M
          0.7367497537172237f,
205
4.32M
          0.0875511500058708f,
206
4.32M
          -0.2921026642334881f,
207
4.32M
          0.1940289303259434f,
208
4.32M
      },
209
4.32M
      {
210
4.32M
          0.2500000000000000,
211
4.32M
          -0.1014005039375377f,
212
4.32M
          0.1957439937204294f,
213
4.32M
          -0.1621205195722993f,
214
4.32M
          0.0000000000000000,
215
4.32M
          -0.0643507165794628f,
216
4.32M
          0.0074182263792424f,
217
4.32M
          -0.2904801297289980f,
218
4.32M
          0.0952002265347504f,
219
4.32M
          0.0000000000000000,
220
4.32M
          -0.3675398009862027f,
221
4.32M
          0.4921585901373873f,
222
4.32M
          0.2462710772207515f,
223
4.32M
          -0.0794670660590957f,
224
4.32M
          0.3623817333531167f,
225
4.32M
          -0.4351904965232280f,
226
4.32M
      },
227
4.32M
      {
228
4.32M
          0.2500000000000000,
229
4.32M
          -0.1014005039375376f,
230
4.32M
          0.2929100136981264f,
231
4.32M
          0.0000000000000000,
232
4.32M
          0.0000000000000000,
233
4.32M
          -0.0643507165794627f,
234
4.32M
          0.3935103426921017f,
235
4.32M
          -0.0657870154914280f,
236
4.32M
          0.0000000000000000,
237
4.32M
          -0.4082482904638628f,
238
4.32M
          -0.3078822139579090f,
239
4.32M
          -0.3852501370925192f,
240
4.32M
          -0.0857401903551931f,
241
4.32M
          -0.4613374887461511f,
242
4.32M
          0.0000000000000000,
243
4.32M
          0.2191868483885747f,
244
4.32M
      },
245
4.32M
      {
246
4.32M
          0.2500000000000000,
247
4.32M
          -0.1014005039375376f,
248
4.32M
          -0.4067007583026072f,
249
4.32M
          -0.2125574805828705f,
250
4.32M
          0.0000000000000000,
251
4.32M
          -0.0643507165794627f,
252
4.32M
          -0.4517556589999464f,
253
4.32M
          0.3046847507248840f,
254
4.32M
          0.3017929516615503f,
255
4.32M
          -0.4082482904638635f,
256
4.32M
          -0.1747866975480813f,
257
4.32M
          0.2110560104933581f,
258
4.32M
          -0.1426608480880734f,
259
4.32M
          -0.1381354035075829f,
260
4.32M
          -0.1743760259965108f,
261
4.32M
          0.1135498731499426f,
262
4.32M
      },
263
4.32M
      {
264
4.32M
          0.2500000000000000,
265
4.32M
          -0.1014005039375377f,
266
4.32M
          -0.1957439937204287f,
267
4.32M
          -0.1621205195722833f,
268
4.32M
          0.0000000000000000,
269
4.32M
          -0.0643507165794628f,
270
4.32M
          0.0074182263792444f,
271
4.32M
          0.2904801297290076f,
272
4.32M
          0.0952002265347505f,
273
4.32M
          0.0000000000000000,
274
4.32M
          0.3675398009862011f,
275
4.32M
          -0.4921585901373891f,
276
4.32M
          0.2462710772207514f,
277
4.32M
          -0.0794670660591026f,
278
4.32M
          0.3623817333531165f,
279
4.32M
          -0.4351904965232251f,
280
4.32M
      },
281
4.32M
      {
282
4.32M
          0.2500000000000000,
283
4.32M
          -0.1014005039375375f,
284
4.32M
          0.0000000000000000,
285
4.32M
          -0.4706702258572528f,
286
4.32M
          0.0000000000000000,
287
4.32M
          -0.0643507165794627f,
288
4.32M
          0.1107416575309343f,
289
4.32M
          0.0000000000000000,
290
4.32M
          -0.1627234014286617f,
291
4.32M
          0.0000000000000000,
292
4.32M
          0.0000000000000000,
293
4.32M
          0.0000000000000000,
294
4.32M
          0.1488339922711357f,
295
4.32M
          0.4972464710953509f,
296
4.32M
          0.2921026642334879f,
297
4.32M
          0.5550443808910661f,
298
4.32M
      },
299
4.32M
      {
300
4.32M
          0.2500000000000000,
301
4.32M
          -0.1014005039375377f,
302
4.32M
          0.1137907446044809f,
303
4.32M
          -0.1464291867126764f,
304
4.32M
          0.0000000000000000,
305
4.32M
          -0.0643507165794628f,
306
4.32M
          0.0829816309488205f,
307
4.32M
          -0.2388977352334460f,
308
4.32M
          -0.3531238544981630f,
309
4.32M
          -0.4082482904638630f,
310
4.32M
          0.4826689115059883f,
311
4.32M
          0.1741941265991622f,
312
4.32M
          -0.0476868035022925f,
313
4.32M
          0.1253805944856366f,
314
4.32M
          -0.4326608024727445f,
315
4.32M
          -0.2546827712406646f,
316
4.32M
      },
317
4.32M
      {
318
4.32M
          0.2500000000000000,
319
4.32M
          -0.1014005039375377f,
320
4.32M
          -0.4444481661973438f,
321
4.32M
          0.3085497062849487f,
322
4.32M
          0.0000000000000000,
323
4.32M
          -0.0643507165794628f,
324
4.32M
          0.1585450355183970f,
325
4.32M
          -0.5112616136592012f,
326
4.32M
          0.2579236279634129f,
327
4.32M
          0.0000000000000000,
328
4.32M
          -0.0812611176717504f,
329
4.32M
          -0.1856718091610990f,
330
4.32M
          -0.3416446842253373f,
331
4.32M
          0.3302282550303805f,
332
4.32M
          0.0702790691196282f,
333
4.32M
          -0.0741750459581023f,
334
4.32M
      },
335
4.32M
      {
336
4.32M
          0.2500000000000000,
337
4.32M
          -0.1014005039375376f,
338
4.32M
          -0.2929100136981264f,
339
4.32M
          0.0000000000000000,
340
4.32M
          0.0000000000000000,
341
4.32M
          -0.0643507165794627f,
342
4.32M
          0.3935103426921022f,
343
4.32M
          0.0657870154914254f,
344
4.32M
          0.0000000000000000,
345
4.32M
          0.4082482904638634f,
346
4.32M
          0.3078822139579031f,
347
4.32M
          0.3852501370925211f,
348
4.32M
          -0.0857401903551927f,
349
4.32M
          -0.4613374887461554f,
350
4.32M
          0.0000000000000000,
351
4.32M
          0.2191868483885728f,
352
4.32M
      },
353
4.32M
      {
354
4.32M
          0.2500000000000000,
355
4.32M
          -0.1014005039375376f,
356
4.32M
          -0.1137907446044814f,
357
4.32M
          -0.1464291867126654f,
358
4.32M
          0.0000000000000000,
359
4.32M
          -0.0643507165794627f,
360
4.32M
          0.0829816309488214f,
361
4.32M
          0.2388977352334547f,
362
4.32M
          -0.3531238544981624f,
363
4.32M
          0.4082482904638630f,
364
4.32M
          -0.4826689115059858f,
365
4.32M
          -0.1741941265991621f,
366
4.32M
          -0.0476868035022928f,
367
4.32M
          0.1253805944856431f,
368
4.32M
          -0.4326608024727457f,
369
4.32M
          -0.2546827712406641f,
370
4.32M
      },
371
4.32M
      {
372
4.32M
          0.2500000000000000,
373
4.32M
          -0.1014005039375374f,
374
4.32M
          0.0000000000000000,
375
4.32M
          0.4251149611657548f,
376
4.32M
          0.0000000000000000,
377
4.32M
          -0.0643507165794626f,
378
4.32M
          -0.4517556589999480f,
379
4.32M
          0.0000000000000000,
380
4.32M
          -0.6035859033230976f,
381
4.32M
          0.0000000000000000,
382
4.32M
          0.0000000000000000,
383
4.32M
          0.0000000000000000,
384
4.32M
          -0.1426608480880724f,
385
4.32M
          -0.1381354035075845f,
386
4.32M
          0.3487520519930227f,
387
4.32M
          0.1135498731499429f,
388
4.32M
      },
389
4.32M
  };
390
391
4.32M
  const HWY_CAPPED(float, 16) d;
392
12.9M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
8.65M
    auto scalar = Zero(d);
394
147M
    for (size_t j = 0; j < 16; j++) {
395
138M
      auto px = Set(d, pixels[j]);
396
138M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
138M
      scalar = MulAdd(px, basis, scalar);
398
138M
    }
399
8.65M
    Store(scalar, d, coeffs + i);
400
8.65M
  }
401
4.32M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
44.6k
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
44.6k
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
44.6k
      {
102
44.6k
          0.2500000000000000,
103
44.6k
          0.8769029297991420f,
104
44.6k
          0.0000000000000000,
105
44.6k
          0.0000000000000000,
106
44.6k
          0.0000000000000000,
107
44.6k
          -0.4105377591765233f,
108
44.6k
          0.0000000000000000,
109
44.6k
          0.0000000000000000,
110
44.6k
          0.0000000000000000,
111
44.6k
          0.0000000000000000,
112
44.6k
          0.0000000000000000,
113
44.6k
          0.0000000000000000,
114
44.6k
          0.0000000000000000,
115
44.6k
          0.0000000000000000,
116
44.6k
          0.0000000000000000,
117
44.6k
          0.0000000000000000,
118
44.6k
      },
119
44.6k
      {
120
44.6k
          0.2500000000000000,
121
44.6k
          0.2206518106944235f,
122
44.6k
          0.0000000000000000,
123
44.6k
          0.0000000000000000,
124
44.6k
          -0.7071067811865474f,
125
44.6k
          0.6235485373547691f,
126
44.6k
          0.0000000000000000,
127
44.6k
          0.0000000000000000,
128
44.6k
          0.0000000000000000,
129
44.6k
          0.0000000000000000,
130
44.6k
          0.0000000000000000,
131
44.6k
          0.0000000000000000,
132
44.6k
          0.0000000000000000,
133
44.6k
          0.0000000000000000,
134
44.6k
          0.0000000000000000,
135
44.6k
          0.0000000000000000,
136
44.6k
      },
137
44.6k
      {
138
44.6k
          0.2500000000000000,
139
44.6k
          -0.1014005039375376f,
140
44.6k
          0.4067007583026075f,
141
44.6k
          -0.2125574805828875f,
142
44.6k
          0.0000000000000000,
143
44.6k
          -0.0643507165794627f,
144
44.6k
          -0.4517556589999482f,
145
44.6k
          -0.3046847507248690f,
146
44.6k
          0.3017929516615495f,
147
44.6k
          0.4082482904638627f,
148
44.6k
          0.1747866975480809f,
149
44.6k
          -0.2110560104933578f,
150
44.6k
          -0.1426608480880726f,
151
44.6k
          -0.1381354035075859f,
152
44.6k
          -0.1743760259965107f,
153
44.6k
          0.1135498731499434f,
154
44.6k
      },
155
44.6k
      {
156
44.6k
          0.2500000000000000,
157
44.6k
          -0.1014005039375375f,
158
44.6k
          0.4444481661973445f,
159
44.6k
          0.3085497062849767f,
160
44.6k
          0.0000000000000000f,
161
44.6k
          -0.0643507165794627f,
162
44.6k
          0.1585450355184006f,
163
44.6k
          0.5112616136591823f,
164
44.6k
          0.2579236279634118f,
165
44.6k
          0.0000000000000000,
166
44.6k
          0.0812611176717539f,
167
44.6k
          0.1856718091610980f,
168
44.6k
          -0.3416446842253372f,
169
44.6k
          0.3302282550303788f,
170
44.6k
          0.0702790691196284f,
171
44.6k
          -0.0741750459581035f,
172
44.6k
      },
173
44.6k
      {
174
44.6k
          0.2500000000000000,
175
44.6k
          0.2206518106944236f,
176
44.6k
          0.0000000000000000,
177
44.6k
          0.0000000000000000,
178
44.6k
          0.7071067811865476f,
179
44.6k
          0.6235485373547694f,
180
44.6k
          0.0000000000000000,
181
44.6k
          0.0000000000000000,
182
44.6k
          0.0000000000000000,
183
44.6k
          0.0000000000000000,
184
44.6k
          0.0000000000000000,
185
44.6k
          0.0000000000000000,
186
44.6k
          0.0000000000000000,
187
44.6k
          0.0000000000000000,
188
44.6k
          0.0000000000000000,
189
44.6k
          0.0000000000000000,
190
44.6k
      },
191
44.6k
      {
192
44.6k
          0.2500000000000000,
193
44.6k
          -0.1014005039375378f,
194
44.6k
          0.0000000000000000,
195
44.6k
          0.4706702258572536f,
196
44.6k
          0.0000000000000000,
197
44.6k
          -0.0643507165794628f,
198
44.6k
          -0.0403851516082220f,
199
44.6k
          0.0000000000000000,
200
44.6k
          0.1627234014286620f,
201
44.6k
          0.0000000000000000,
202
44.6k
          0.0000000000000000,
203
44.6k
          0.0000000000000000,
204
44.6k
          0.7367497537172237f,
205
44.6k
          0.0875511500058708f,
206
44.6k
          -0.2921026642334881f,
207
44.6k
          0.1940289303259434f,
208
44.6k
      },
209
44.6k
      {
210
44.6k
          0.2500000000000000,
211
44.6k
          -0.1014005039375377f,
212
44.6k
          0.1957439937204294f,
213
44.6k
          -0.1621205195722993f,
214
44.6k
          0.0000000000000000,
215
44.6k
          -0.0643507165794628f,
216
44.6k
          0.0074182263792424f,
217
44.6k
          -0.2904801297289980f,
218
44.6k
          0.0952002265347504f,
219
44.6k
          0.0000000000000000,
220
44.6k
          -0.3675398009862027f,
221
44.6k
          0.4921585901373873f,
222
44.6k
          0.2462710772207515f,
223
44.6k
          -0.0794670660590957f,
224
44.6k
          0.3623817333531167f,
225
44.6k
          -0.4351904965232280f,
226
44.6k
      },
227
44.6k
      {
228
44.6k
          0.2500000000000000,
229
44.6k
          -0.1014005039375376f,
230
44.6k
          0.2929100136981264f,
231
44.6k
          0.0000000000000000,
232
44.6k
          0.0000000000000000,
233
44.6k
          -0.0643507165794627f,
234
44.6k
          0.3935103426921017f,
235
44.6k
          -0.0657870154914280f,
236
44.6k
          0.0000000000000000,
237
44.6k
          -0.4082482904638628f,
238
44.6k
          -0.3078822139579090f,
239
44.6k
          -0.3852501370925192f,
240
44.6k
          -0.0857401903551931f,
241
44.6k
          -0.4613374887461511f,
242
44.6k
          0.0000000000000000,
243
44.6k
          0.2191868483885747f,
244
44.6k
      },
245
44.6k
      {
246
44.6k
          0.2500000000000000,
247
44.6k
          -0.1014005039375376f,
248
44.6k
          -0.4067007583026072f,
249
44.6k
          -0.2125574805828705f,
250
44.6k
          0.0000000000000000,
251
44.6k
          -0.0643507165794627f,
252
44.6k
          -0.4517556589999464f,
253
44.6k
          0.3046847507248840f,
254
44.6k
          0.3017929516615503f,
255
44.6k
          -0.4082482904638635f,
256
44.6k
          -0.1747866975480813f,
257
44.6k
          0.2110560104933581f,
258
44.6k
          -0.1426608480880734f,
259
44.6k
          -0.1381354035075829f,
260
44.6k
          -0.1743760259965108f,
261
44.6k
          0.1135498731499426f,
262
44.6k
      },
263
44.6k
      {
264
44.6k
          0.2500000000000000,
265
44.6k
          -0.1014005039375377f,
266
44.6k
          -0.1957439937204287f,
267
44.6k
          -0.1621205195722833f,
268
44.6k
          0.0000000000000000,
269
44.6k
          -0.0643507165794628f,
270
44.6k
          0.0074182263792444f,
271
44.6k
          0.2904801297290076f,
272
44.6k
          0.0952002265347505f,
273
44.6k
          0.0000000000000000,
274
44.6k
          0.3675398009862011f,
275
44.6k
          -0.4921585901373891f,
276
44.6k
          0.2462710772207514f,
277
44.6k
          -0.0794670660591026f,
278
44.6k
          0.3623817333531165f,
279
44.6k
          -0.4351904965232251f,
280
44.6k
      },
281
44.6k
      {
282
44.6k
          0.2500000000000000,
283
44.6k
          -0.1014005039375375f,
284
44.6k
          0.0000000000000000,
285
44.6k
          -0.4706702258572528f,
286
44.6k
          0.0000000000000000,
287
44.6k
          -0.0643507165794627f,
288
44.6k
          0.1107416575309343f,
289
44.6k
          0.0000000000000000,
290
44.6k
          -0.1627234014286617f,
291
44.6k
          0.0000000000000000,
292
44.6k
          0.0000000000000000,
293
44.6k
          0.0000000000000000,
294
44.6k
          0.1488339922711357f,
295
44.6k
          0.4972464710953509f,
296
44.6k
          0.2921026642334879f,
297
44.6k
          0.5550443808910661f,
298
44.6k
      },
299
44.6k
      {
300
44.6k
          0.2500000000000000,
301
44.6k
          -0.1014005039375377f,
302
44.6k
          0.1137907446044809f,
303
44.6k
          -0.1464291867126764f,
304
44.6k
          0.0000000000000000,
305
44.6k
          -0.0643507165794628f,
306
44.6k
          0.0829816309488205f,
307
44.6k
          -0.2388977352334460f,
308
44.6k
          -0.3531238544981630f,
309
44.6k
          -0.4082482904638630f,
310
44.6k
          0.4826689115059883f,
311
44.6k
          0.1741941265991622f,
312
44.6k
          -0.0476868035022925f,
313
44.6k
          0.1253805944856366f,
314
44.6k
          -0.4326608024727445f,
315
44.6k
          -0.2546827712406646f,
316
44.6k
      },
317
44.6k
      {
318
44.6k
          0.2500000000000000,
319
44.6k
          -0.1014005039375377f,
320
44.6k
          -0.4444481661973438f,
321
44.6k
          0.3085497062849487f,
322
44.6k
          0.0000000000000000,
323
44.6k
          -0.0643507165794628f,
324
44.6k
          0.1585450355183970f,
325
44.6k
          -0.5112616136592012f,
326
44.6k
          0.2579236279634129f,
327
44.6k
          0.0000000000000000,
328
44.6k
          -0.0812611176717504f,
329
44.6k
          -0.1856718091610990f,
330
44.6k
          -0.3416446842253373f,
331
44.6k
          0.3302282550303805f,
332
44.6k
          0.0702790691196282f,
333
44.6k
          -0.0741750459581023f,
334
44.6k
      },
335
44.6k
      {
336
44.6k
          0.2500000000000000,
337
44.6k
          -0.1014005039375376f,
338
44.6k
          -0.2929100136981264f,
339
44.6k
          0.0000000000000000,
340
44.6k
          0.0000000000000000,
341
44.6k
          -0.0643507165794627f,
342
44.6k
          0.3935103426921022f,
343
44.6k
          0.0657870154914254f,
344
44.6k
          0.0000000000000000,
345
44.6k
          0.4082482904638634f,
346
44.6k
          0.3078822139579031f,
347
44.6k
          0.3852501370925211f,
348
44.6k
          -0.0857401903551927f,
349
44.6k
          -0.4613374887461554f,
350
44.6k
          0.0000000000000000,
351
44.6k
          0.2191868483885728f,
352
44.6k
      },
353
44.6k
      {
354
44.6k
          0.2500000000000000,
355
44.6k
          -0.1014005039375376f,
356
44.6k
          -0.1137907446044814f,
357
44.6k
          -0.1464291867126654f,
358
44.6k
          0.0000000000000000,
359
44.6k
          -0.0643507165794627f,
360
44.6k
          0.0829816309488214f,
361
44.6k
          0.2388977352334547f,
362
44.6k
          -0.3531238544981624f,
363
44.6k
          0.4082482904638630f,
364
44.6k
          -0.4826689115059858f,
365
44.6k
          -0.1741941265991621f,
366
44.6k
          -0.0476868035022928f,
367
44.6k
          0.1253805944856431f,
368
44.6k
          -0.4326608024727457f,
369
44.6k
          -0.2546827712406641f,
370
44.6k
      },
371
44.6k
      {
372
44.6k
          0.2500000000000000,
373
44.6k
          -0.1014005039375374f,
374
44.6k
          0.0000000000000000,
375
44.6k
          0.4251149611657548f,
376
44.6k
          0.0000000000000000,
377
44.6k
          -0.0643507165794626f,
378
44.6k
          -0.4517556589999480f,
379
44.6k
          0.0000000000000000,
380
44.6k
          -0.6035859033230976f,
381
44.6k
          0.0000000000000000,
382
44.6k
          0.0000000000000000,
383
44.6k
          0.0000000000000000,
384
44.6k
          -0.1426608480880724f,
385
44.6k
          -0.1381354035075845f,
386
44.6k
          0.3487520519930227f,
387
44.6k
          0.1135498731499429f,
388
44.6k
      },
389
44.6k
  };
390
391
44.6k
  const HWY_CAPPED(float, 16) d;
392
133k
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
89.2k
    auto scalar = Zero(d);
394
1.51M
    for (size_t j = 0; j < 16; j++) {
395
1.42M
      auto px = Set(d, pixels[j]);
396
1.42M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
1.42M
      scalar = MulAdd(px, basis, scalar);
398
1.42M
    }
399
89.2k
    Store(scalar, d, coeffs + i);
400
89.2k
  }
401
44.6k
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
44.6k
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
44.6k
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
44.6k
      {
102
44.6k
          0.2500000000000000,
103
44.6k
          0.8769029297991420f,
104
44.6k
          0.0000000000000000,
105
44.6k
          0.0000000000000000,
106
44.6k
          0.0000000000000000,
107
44.6k
          -0.4105377591765233f,
108
44.6k
          0.0000000000000000,
109
44.6k
          0.0000000000000000,
110
44.6k
          0.0000000000000000,
111
44.6k
          0.0000000000000000,
112
44.6k
          0.0000000000000000,
113
44.6k
          0.0000000000000000,
114
44.6k
          0.0000000000000000,
115
44.6k
          0.0000000000000000,
116
44.6k
          0.0000000000000000,
117
44.6k
          0.0000000000000000,
118
44.6k
      },
119
44.6k
      {
120
44.6k
          0.2500000000000000,
121
44.6k
          0.2206518106944235f,
122
44.6k
          0.0000000000000000,
123
44.6k
          0.0000000000000000,
124
44.6k
          -0.7071067811865474f,
125
44.6k
          0.6235485373547691f,
126
44.6k
          0.0000000000000000,
127
44.6k
          0.0000000000000000,
128
44.6k
          0.0000000000000000,
129
44.6k
          0.0000000000000000,
130
44.6k
          0.0000000000000000,
131
44.6k
          0.0000000000000000,
132
44.6k
          0.0000000000000000,
133
44.6k
          0.0000000000000000,
134
44.6k
          0.0000000000000000,
135
44.6k
          0.0000000000000000,
136
44.6k
      },
137
44.6k
      {
138
44.6k
          0.2500000000000000,
139
44.6k
          -0.1014005039375376f,
140
44.6k
          0.4067007583026075f,
141
44.6k
          -0.2125574805828875f,
142
44.6k
          0.0000000000000000,
143
44.6k
          -0.0643507165794627f,
144
44.6k
          -0.4517556589999482f,
145
44.6k
          -0.3046847507248690f,
146
44.6k
          0.3017929516615495f,
147
44.6k
          0.4082482904638627f,
148
44.6k
          0.1747866975480809f,
149
44.6k
          -0.2110560104933578f,
150
44.6k
          -0.1426608480880726f,
151
44.6k
          -0.1381354035075859f,
152
44.6k
          -0.1743760259965107f,
153
44.6k
          0.1135498731499434f,
154
44.6k
      },
155
44.6k
      {
156
44.6k
          0.2500000000000000,
157
44.6k
          -0.1014005039375375f,
158
44.6k
          0.4444481661973445f,
159
44.6k
          0.3085497062849767f,
160
44.6k
          0.0000000000000000f,
161
44.6k
          -0.0643507165794627f,
162
44.6k
          0.1585450355184006f,
163
44.6k
          0.5112616136591823f,
164
44.6k
          0.2579236279634118f,
165
44.6k
          0.0000000000000000,
166
44.6k
          0.0812611176717539f,
167
44.6k
          0.1856718091610980f,
168
44.6k
          -0.3416446842253372f,
169
44.6k
          0.3302282550303788f,
170
44.6k
          0.0702790691196284f,
171
44.6k
          -0.0741750459581035f,
172
44.6k
      },
173
44.6k
      {
174
44.6k
          0.2500000000000000,
175
44.6k
          0.2206518106944236f,
176
44.6k
          0.0000000000000000,
177
44.6k
          0.0000000000000000,
178
44.6k
          0.7071067811865476f,
179
44.6k
          0.6235485373547694f,
180
44.6k
          0.0000000000000000,
181
44.6k
          0.0000000000000000,
182
44.6k
          0.0000000000000000,
183
44.6k
          0.0000000000000000,
184
44.6k
          0.0000000000000000,
185
44.6k
          0.0000000000000000,
186
44.6k
          0.0000000000000000,
187
44.6k
          0.0000000000000000,
188
44.6k
          0.0000000000000000,
189
44.6k
          0.0000000000000000,
190
44.6k
      },
191
44.6k
      {
192
44.6k
          0.2500000000000000,
193
44.6k
          -0.1014005039375378f,
194
44.6k
          0.0000000000000000,
195
44.6k
          0.4706702258572536f,
196
44.6k
          0.0000000000000000,
197
44.6k
          -0.0643507165794628f,
198
44.6k
          -0.0403851516082220f,
199
44.6k
          0.0000000000000000,
200
44.6k
          0.1627234014286620f,
201
44.6k
          0.0000000000000000,
202
44.6k
          0.0000000000000000,
203
44.6k
          0.0000000000000000,
204
44.6k
          0.7367497537172237f,
205
44.6k
          0.0875511500058708f,
206
44.6k
          -0.2921026642334881f,
207
44.6k
          0.1940289303259434f,
208
44.6k
      },
209
44.6k
      {
210
44.6k
          0.2500000000000000,
211
44.6k
          -0.1014005039375377f,
212
44.6k
          0.1957439937204294f,
213
44.6k
          -0.1621205195722993f,
214
44.6k
          0.0000000000000000,
215
44.6k
          -0.0643507165794628f,
216
44.6k
          0.0074182263792424f,
217
44.6k
          -0.2904801297289980f,
218
44.6k
          0.0952002265347504f,
219
44.6k
          0.0000000000000000,
220
44.6k
          -0.3675398009862027f,
221
44.6k
          0.4921585901373873f,
222
44.6k
          0.2462710772207515f,
223
44.6k
          -0.0794670660590957f,
224
44.6k
          0.3623817333531167f,
225
44.6k
          -0.4351904965232280f,
226
44.6k
      },
227
44.6k
      {
228
44.6k
          0.2500000000000000,
229
44.6k
          -0.1014005039375376f,
230
44.6k
          0.2929100136981264f,
231
44.6k
          0.0000000000000000,
232
44.6k
          0.0000000000000000,
233
44.6k
          -0.0643507165794627f,
234
44.6k
          0.3935103426921017f,
235
44.6k
          -0.0657870154914280f,
236
44.6k
          0.0000000000000000,
237
44.6k
          -0.4082482904638628f,
238
44.6k
          -0.3078822139579090f,
239
44.6k
          -0.3852501370925192f,
240
44.6k
          -0.0857401903551931f,
241
44.6k
          -0.4613374887461511f,
242
44.6k
          0.0000000000000000,
243
44.6k
          0.2191868483885747f,
244
44.6k
      },
245
44.6k
      {
246
44.6k
          0.2500000000000000,
247
44.6k
          -0.1014005039375376f,
248
44.6k
          -0.4067007583026072f,
249
44.6k
          -0.2125574805828705f,
250
44.6k
          0.0000000000000000,
251
44.6k
          -0.0643507165794627f,
252
44.6k
          -0.4517556589999464f,
253
44.6k
          0.3046847507248840f,
254
44.6k
          0.3017929516615503f,
255
44.6k
          -0.4082482904638635f,
256
44.6k
          -0.1747866975480813f,
257
44.6k
          0.2110560104933581f,
258
44.6k
          -0.1426608480880734f,
259
44.6k
          -0.1381354035075829f,
260
44.6k
          -0.1743760259965108f,
261
44.6k
          0.1135498731499426f,
262
44.6k
      },
263
44.6k
      {
264
44.6k
          0.2500000000000000,
265
44.6k
          -0.1014005039375377f,
266
44.6k
          -0.1957439937204287f,
267
44.6k
          -0.1621205195722833f,
268
44.6k
          0.0000000000000000,
269
44.6k
          -0.0643507165794628f,
270
44.6k
          0.0074182263792444f,
271
44.6k
          0.2904801297290076f,
272
44.6k
          0.0952002265347505f,
273
44.6k
          0.0000000000000000,
274
44.6k
          0.3675398009862011f,
275
44.6k
          -0.4921585901373891f,
276
44.6k
          0.2462710772207514f,
277
44.6k
          -0.0794670660591026f,
278
44.6k
          0.3623817333531165f,
279
44.6k
          -0.4351904965232251f,
280
44.6k
      },
281
44.6k
      {
282
44.6k
          0.2500000000000000,
283
44.6k
          -0.1014005039375375f,
284
44.6k
          0.0000000000000000,
285
44.6k
          -0.4706702258572528f,
286
44.6k
          0.0000000000000000,
287
44.6k
          -0.0643507165794627f,
288
44.6k
          0.1107416575309343f,
289
44.6k
          0.0000000000000000,
290
44.6k
          -0.1627234014286617f,
291
44.6k
          0.0000000000000000,
292
44.6k
          0.0000000000000000,
293
44.6k
          0.0000000000000000,
294
44.6k
          0.1488339922711357f,
295
44.6k
          0.4972464710953509f,
296
44.6k
          0.2921026642334879f,
297
44.6k
          0.5550443808910661f,
298
44.6k
      },
299
44.6k
      {
300
44.6k
          0.2500000000000000,
301
44.6k
          -0.1014005039375377f,
302
44.6k
          0.1137907446044809f,
303
44.6k
          -0.1464291867126764f,
304
44.6k
          0.0000000000000000,
305
44.6k
          -0.0643507165794628f,
306
44.6k
          0.0829816309488205f,
307
44.6k
          -0.2388977352334460f,
308
44.6k
          -0.3531238544981630f,
309
44.6k
          -0.4082482904638630f,
310
44.6k
          0.4826689115059883f,
311
44.6k
          0.1741941265991622f,
312
44.6k
          -0.0476868035022925f,
313
44.6k
          0.1253805944856366f,
314
44.6k
          -0.4326608024727445f,
315
44.6k
          -0.2546827712406646f,
316
44.6k
      },
317
44.6k
      {
318
44.6k
          0.2500000000000000,
319
44.6k
          -0.1014005039375377f,
320
44.6k
          -0.4444481661973438f,
321
44.6k
          0.3085497062849487f,
322
44.6k
          0.0000000000000000,
323
44.6k
          -0.0643507165794628f,
324
44.6k
          0.1585450355183970f,
325
44.6k
          -0.5112616136592012f,
326
44.6k
          0.2579236279634129f,
327
44.6k
          0.0000000000000000,
328
44.6k
          -0.0812611176717504f,
329
44.6k
          -0.1856718091610990f,
330
44.6k
          -0.3416446842253373f,
331
44.6k
          0.3302282550303805f,
332
44.6k
          0.0702790691196282f,
333
44.6k
          -0.0741750459581023f,
334
44.6k
      },
335
44.6k
      {
336
44.6k
          0.2500000000000000,
337
44.6k
          -0.1014005039375376f,
338
44.6k
          -0.2929100136981264f,
339
44.6k
          0.0000000000000000,
340
44.6k
          0.0000000000000000,
341
44.6k
          -0.0643507165794627f,
342
44.6k
          0.3935103426921022f,
343
44.6k
          0.0657870154914254f,
344
44.6k
          0.0000000000000000,
345
44.6k
          0.4082482904638634f,
346
44.6k
          0.3078822139579031f,
347
44.6k
          0.3852501370925211f,
348
44.6k
          -0.0857401903551927f,
349
44.6k
          -0.4613374887461554f,
350
44.6k
          0.0000000000000000,
351
44.6k
          0.2191868483885728f,
352
44.6k
      },
353
44.6k
      {
354
44.6k
          0.2500000000000000,
355
44.6k
          -0.1014005039375376f,
356
44.6k
          -0.1137907446044814f,
357
44.6k
          -0.1464291867126654f,
358
44.6k
          0.0000000000000000,
359
44.6k
          -0.0643507165794627f,
360
44.6k
          0.0829816309488214f,
361
44.6k
          0.2388977352334547f,
362
44.6k
          -0.3531238544981624f,
363
44.6k
          0.4082482904638630f,
364
44.6k
          -0.4826689115059858f,
365
44.6k
          -0.1741941265991621f,
366
44.6k
          -0.0476868035022928f,
367
44.6k
          0.1253805944856431f,
368
44.6k
          -0.4326608024727457f,
369
44.6k
          -0.2546827712406641f,
370
44.6k
      },
371
44.6k
      {
372
44.6k
          0.2500000000000000,
373
44.6k
          -0.1014005039375374f,
374
44.6k
          0.0000000000000000,
375
44.6k
          0.4251149611657548f,
376
44.6k
          0.0000000000000000,
377
44.6k
          -0.0643507165794626f,
378
44.6k
          -0.4517556589999480f,
379
44.6k
          0.0000000000000000,
380
44.6k
          -0.6035859033230976f,
381
44.6k
          0.0000000000000000,
382
44.6k
          0.0000000000000000,
383
44.6k
          0.0000000000000000,
384
44.6k
          -0.1426608480880724f,
385
44.6k
          -0.1381354035075845f,
386
44.6k
          0.3487520519930227f,
387
44.6k
          0.1135498731499429f,
388
44.6k
      },
389
44.6k
  };
390
391
44.6k
  const HWY_CAPPED(float, 16) d;
392
133k
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
89.2k
    auto scalar = Zero(d);
394
1.51M
    for (size_t j = 0; j < 16; j++) {
395
1.42M
      auto px = Set(d, pixels[j]);
396
1.42M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
1.42M
      scalar = MulAdd(px, basis, scalar);
398
1.42M
    }
399
89.2k
    Store(scalar, d, coeffs + i);
400
89.2k
  }
401
44.6k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
4.23M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
4.23M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
4.23M
      {
102
4.23M
          0.2500000000000000,
103
4.23M
          0.8769029297991420f,
104
4.23M
          0.0000000000000000,
105
4.23M
          0.0000000000000000,
106
4.23M
          0.0000000000000000,
107
4.23M
          -0.4105377591765233f,
108
4.23M
          0.0000000000000000,
109
4.23M
          0.0000000000000000,
110
4.23M
          0.0000000000000000,
111
4.23M
          0.0000000000000000,
112
4.23M
          0.0000000000000000,
113
4.23M
          0.0000000000000000,
114
4.23M
          0.0000000000000000,
115
4.23M
          0.0000000000000000,
116
4.23M
          0.0000000000000000,
117
4.23M
          0.0000000000000000,
118
4.23M
      },
119
4.23M
      {
120
4.23M
          0.2500000000000000,
121
4.23M
          0.2206518106944235f,
122
4.23M
          0.0000000000000000,
123
4.23M
          0.0000000000000000,
124
4.23M
          -0.7071067811865474f,
125
4.23M
          0.6235485373547691f,
126
4.23M
          0.0000000000000000,
127
4.23M
          0.0000000000000000,
128
4.23M
          0.0000000000000000,
129
4.23M
          0.0000000000000000,
130
4.23M
          0.0000000000000000,
131
4.23M
          0.0000000000000000,
132
4.23M
          0.0000000000000000,
133
4.23M
          0.0000000000000000,
134
4.23M
          0.0000000000000000,
135
4.23M
          0.0000000000000000,
136
4.23M
      },
137
4.23M
      {
138
4.23M
          0.2500000000000000,
139
4.23M
          -0.1014005039375376f,
140
4.23M
          0.4067007583026075f,
141
4.23M
          -0.2125574805828875f,
142
4.23M
          0.0000000000000000,
143
4.23M
          -0.0643507165794627f,
144
4.23M
          -0.4517556589999482f,
145
4.23M
          -0.3046847507248690f,
146
4.23M
          0.3017929516615495f,
147
4.23M
          0.4082482904638627f,
148
4.23M
          0.1747866975480809f,
149
4.23M
          -0.2110560104933578f,
150
4.23M
          -0.1426608480880726f,
151
4.23M
          -0.1381354035075859f,
152
4.23M
          -0.1743760259965107f,
153
4.23M
          0.1135498731499434f,
154
4.23M
      },
155
4.23M
      {
156
4.23M
          0.2500000000000000,
157
4.23M
          -0.1014005039375375f,
158
4.23M
          0.4444481661973445f,
159
4.23M
          0.3085497062849767f,
160
4.23M
          0.0000000000000000f,
161
4.23M
          -0.0643507165794627f,
162
4.23M
          0.1585450355184006f,
163
4.23M
          0.5112616136591823f,
164
4.23M
          0.2579236279634118f,
165
4.23M
          0.0000000000000000,
166
4.23M
          0.0812611176717539f,
167
4.23M
          0.1856718091610980f,
168
4.23M
          -0.3416446842253372f,
169
4.23M
          0.3302282550303788f,
170
4.23M
          0.0702790691196284f,
171
4.23M
          -0.0741750459581035f,
172
4.23M
      },
173
4.23M
      {
174
4.23M
          0.2500000000000000,
175
4.23M
          0.2206518106944236f,
176
4.23M
          0.0000000000000000,
177
4.23M
          0.0000000000000000,
178
4.23M
          0.7071067811865476f,
179
4.23M
          0.6235485373547694f,
180
4.23M
          0.0000000000000000,
181
4.23M
          0.0000000000000000,
182
4.23M
          0.0000000000000000,
183
4.23M
          0.0000000000000000,
184
4.23M
          0.0000000000000000,
185
4.23M
          0.0000000000000000,
186
4.23M
          0.0000000000000000,
187
4.23M
          0.0000000000000000,
188
4.23M
          0.0000000000000000,
189
4.23M
          0.0000000000000000,
190
4.23M
      },
191
4.23M
      {
192
4.23M
          0.2500000000000000,
193
4.23M
          -0.1014005039375378f,
194
4.23M
          0.0000000000000000,
195
4.23M
          0.4706702258572536f,
196
4.23M
          0.0000000000000000,
197
4.23M
          -0.0643507165794628f,
198
4.23M
          -0.0403851516082220f,
199
4.23M
          0.0000000000000000,
200
4.23M
          0.1627234014286620f,
201
4.23M
          0.0000000000000000,
202
4.23M
          0.0000000000000000,
203
4.23M
          0.0000000000000000,
204
4.23M
          0.7367497537172237f,
205
4.23M
          0.0875511500058708f,
206
4.23M
          -0.2921026642334881f,
207
4.23M
          0.1940289303259434f,
208
4.23M
      },
209
4.23M
      {
210
4.23M
          0.2500000000000000,
211
4.23M
          -0.1014005039375377f,
212
4.23M
          0.1957439937204294f,
213
4.23M
          -0.1621205195722993f,
214
4.23M
          0.0000000000000000,
215
4.23M
          -0.0643507165794628f,
216
4.23M
          0.0074182263792424f,
217
4.23M
          -0.2904801297289980f,
218
4.23M
          0.0952002265347504f,
219
4.23M
          0.0000000000000000,
220
4.23M
          -0.3675398009862027f,
221
4.23M
          0.4921585901373873f,
222
4.23M
          0.2462710772207515f,
223
4.23M
          -0.0794670660590957f,
224
4.23M
          0.3623817333531167f,
225
4.23M
          -0.4351904965232280f,
226
4.23M
      },
227
4.23M
      {
228
4.23M
          0.2500000000000000,
229
4.23M
          -0.1014005039375376f,
230
4.23M
          0.2929100136981264f,
231
4.23M
          0.0000000000000000,
232
4.23M
          0.0000000000000000,
233
4.23M
          -0.0643507165794627f,
234
4.23M
          0.3935103426921017f,
235
4.23M
          -0.0657870154914280f,
236
4.23M
          0.0000000000000000,
237
4.23M
          -0.4082482904638628f,
238
4.23M
          -0.3078822139579090f,
239
4.23M
          -0.3852501370925192f,
240
4.23M
          -0.0857401903551931f,
241
4.23M
          -0.4613374887461511f,
242
4.23M
          0.0000000000000000,
243
4.23M
          0.2191868483885747f,
244
4.23M
      },
245
4.23M
      {
246
4.23M
          0.2500000000000000,
247
4.23M
          -0.1014005039375376f,
248
4.23M
          -0.4067007583026072f,
249
4.23M
          -0.2125574805828705f,
250
4.23M
          0.0000000000000000,
251
4.23M
          -0.0643507165794627f,
252
4.23M
          -0.4517556589999464f,
253
4.23M
          0.3046847507248840f,
254
4.23M
          0.3017929516615503f,
255
4.23M
          -0.4082482904638635f,
256
4.23M
          -0.1747866975480813f,
257
4.23M
          0.2110560104933581f,
258
4.23M
          -0.1426608480880734f,
259
4.23M
          -0.1381354035075829f,
260
4.23M
          -0.1743760259965108f,
261
4.23M
          0.1135498731499426f,
262
4.23M
      },
263
4.23M
      {
264
4.23M
          0.2500000000000000,
265
4.23M
          -0.1014005039375377f,
266
4.23M
          -0.1957439937204287f,
267
4.23M
          -0.1621205195722833f,
268
4.23M
          0.0000000000000000,
269
4.23M
          -0.0643507165794628f,
270
4.23M
          0.0074182263792444f,
271
4.23M
          0.2904801297290076f,
272
4.23M
          0.0952002265347505f,
273
4.23M
          0.0000000000000000,
274
4.23M
          0.3675398009862011f,
275
4.23M
          -0.4921585901373891f,
276
4.23M
          0.2462710772207514f,
277
4.23M
          -0.0794670660591026f,
278
4.23M
          0.3623817333531165f,
279
4.23M
          -0.4351904965232251f,
280
4.23M
      },
281
4.23M
      {
282
4.23M
          0.2500000000000000,
283
4.23M
          -0.1014005039375375f,
284
4.23M
          0.0000000000000000,
285
4.23M
          -0.4706702258572528f,
286
4.23M
          0.0000000000000000,
287
4.23M
          -0.0643507165794627f,
288
4.23M
          0.1107416575309343f,
289
4.23M
          0.0000000000000000,
290
4.23M
          -0.1627234014286617f,
291
4.23M
          0.0000000000000000,
292
4.23M
          0.0000000000000000,
293
4.23M
          0.0000000000000000,
294
4.23M
          0.1488339922711357f,
295
4.23M
          0.4972464710953509f,
296
4.23M
          0.2921026642334879f,
297
4.23M
          0.5550443808910661f,
298
4.23M
      },
299
4.23M
      {
300
4.23M
          0.2500000000000000,
301
4.23M
          -0.1014005039375377f,
302
4.23M
          0.1137907446044809f,
303
4.23M
          -0.1464291867126764f,
304
4.23M
          0.0000000000000000,
305
4.23M
          -0.0643507165794628f,
306
4.23M
          0.0829816309488205f,
307
4.23M
          -0.2388977352334460f,
308
4.23M
          -0.3531238544981630f,
309
4.23M
          -0.4082482904638630f,
310
4.23M
          0.4826689115059883f,
311
4.23M
          0.1741941265991622f,
312
4.23M
          -0.0476868035022925f,
313
4.23M
          0.1253805944856366f,
314
4.23M
          -0.4326608024727445f,
315
4.23M
          -0.2546827712406646f,
316
4.23M
      },
317
4.23M
      {
318
4.23M
          0.2500000000000000,
319
4.23M
          -0.1014005039375377f,
320
4.23M
          -0.4444481661973438f,
321
4.23M
          0.3085497062849487f,
322
4.23M
          0.0000000000000000,
323
4.23M
          -0.0643507165794628f,
324
4.23M
          0.1585450355183970f,
325
4.23M
          -0.5112616136592012f,
326
4.23M
          0.2579236279634129f,
327
4.23M
          0.0000000000000000,
328
4.23M
          -0.0812611176717504f,
329
4.23M
          -0.1856718091610990f,
330
4.23M
          -0.3416446842253373f,
331
4.23M
          0.3302282550303805f,
332
4.23M
          0.0702790691196282f,
333
4.23M
          -0.0741750459581023f,
334
4.23M
      },
335
4.23M
      {
336
4.23M
          0.2500000000000000,
337
4.23M
          -0.1014005039375376f,
338
4.23M
          -0.2929100136981264f,
339
4.23M
          0.0000000000000000,
340
4.23M
          0.0000000000000000,
341
4.23M
          -0.0643507165794627f,
342
4.23M
          0.3935103426921022f,
343
4.23M
          0.0657870154914254f,
344
4.23M
          0.0000000000000000,
345
4.23M
          0.4082482904638634f,
346
4.23M
          0.3078822139579031f,
347
4.23M
          0.3852501370925211f,
348
4.23M
          -0.0857401903551927f,
349
4.23M
          -0.4613374887461554f,
350
4.23M
          0.0000000000000000,
351
4.23M
          0.2191868483885728f,
352
4.23M
      },
353
4.23M
      {
354
4.23M
          0.2500000000000000,
355
4.23M
          -0.1014005039375376f,
356
4.23M
          -0.1137907446044814f,
357
4.23M
          -0.1464291867126654f,
358
4.23M
          0.0000000000000000,
359
4.23M
          -0.0643507165794627f,
360
4.23M
          0.0829816309488214f,
361
4.23M
          0.2388977352334547f,
362
4.23M
          -0.3531238544981624f,
363
4.23M
          0.4082482904638630f,
364
4.23M
          -0.4826689115059858f,
365
4.23M
          -0.1741941265991621f,
366
4.23M
          -0.0476868035022928f,
367
4.23M
          0.1253805944856431f,
368
4.23M
          -0.4326608024727457f,
369
4.23M
          -0.2546827712406641f,
370
4.23M
      },
371
4.23M
      {
372
4.23M
          0.2500000000000000,
373
4.23M
          -0.1014005039375374f,
374
4.23M
          0.0000000000000000,
375
4.23M
          0.4251149611657548f,
376
4.23M
          0.0000000000000000,
377
4.23M
          -0.0643507165794626f,
378
4.23M
          -0.4517556589999480f,
379
4.23M
          0.0000000000000000,
380
4.23M
          -0.6035859033230976f,
381
4.23M
          0.0000000000000000,
382
4.23M
          0.0000000000000000,
383
4.23M
          0.0000000000000000,
384
4.23M
          -0.1426608480880724f,
385
4.23M
          -0.1381354035075845f,
386
4.23M
          0.3487520519930227f,
387
4.23M
          0.1135498731499429f,
388
4.23M
      },
389
4.23M
  };
390
391
4.23M
  const HWY_CAPPED(float, 16) d;
392
12.7M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
8.47M
    auto scalar = Zero(d);
394
144M
    for (size_t j = 0; j < 16; j++) {
395
135M
      auto px = Set(d, pixels[j]);
396
135M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
135M
      scalar = MulAdd(px, basis, scalar);
398
135M
    }
399
8.47M
    Store(scalar, d, coeffs + i);
400
8.47M
  }
401
4.23M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
402
403
// Coefficient layout:
404
//  - (even, even) positions hold AFV coefficients
405
//  - (odd, even) positions hold DCT4x4 coefficients
406
//  - (any, odd) positions hold DCT4x8 coefficients
407
template <size_t afv_kind>
408
void AFVTransformFromPixels(const float* JXL_RESTRICT pixels,
409
                            size_t pixels_stride,
410
4.32M
                            float* JXL_RESTRICT coefficients) {
411
4.32M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
4.32M
  size_t afv_x = afv_kind & 1;
413
4.32M
  size_t afv_y = afv_kind / 2;
414
4.32M
  HWY_ALIGN float block[4 * 8] = {};
415
21.6M
  for (size_t iy = 0; iy < 4; iy++) {
416
86.5M
    for (size_t ix = 0; ix < 4; ix++) {
417
69.2M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
69.2M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
69.2M
    }
420
17.3M
  }
421
  // AFV coefficients in (even, even) positions.
422
4.32M
  HWY_ALIGN float coeff[4 * 4];
423
4.32M
  AFVDCT4x4(block, coeff);
424
21.6M
  for (size_t iy = 0; iy < 4; iy++) {
425
86.5M
    for (size_t ix = 0; ix < 4; ix++) {
426
69.2M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
69.2M
    }
428
17.3M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
4.32M
  ComputeScaledDCT<4, 4>()(
431
4.32M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
4.32M
              pixels_stride),
433
4.32M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
21.6M
  for (size_t iy = 0; iy < 4; iy++) {
436
155M
    for (size_t ix = 0; ix < 8; ix++) {
437
138M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
138M
    }
439
17.3M
  }
440
  // 4x8 DCT of the other half of the block.
441
4.32M
  ComputeScaledDCT<4, 8>()(
442
4.32M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
4.32M
      block, scratch_space);
444
21.6M
  for (size_t iy = 0; iy < 4; iy++) {
445
155M
    for (size_t ix = 0; ix < 8; ix++) {
446
138M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
138M
    }
448
17.3M
  }
449
4.32M
  float block00 = coefficients[0] * 0.25f;
450
4.32M
  float block01 = coefficients[1];
451
4.32M
  float block10 = coefficients[8];
452
4.32M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
4.32M
  coefficients[1] = (block00 - block01) * 0.5f;
454
4.32M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
4.32M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
11.7k
                            float* JXL_RESTRICT coefficients) {
411
11.7k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
11.7k
  size_t afv_x = afv_kind & 1;
413
11.7k
  size_t afv_y = afv_kind / 2;
414
11.7k
  HWY_ALIGN float block[4 * 8] = {};
415
58.8k
  for (size_t iy = 0; iy < 4; iy++) {
416
235k
    for (size_t ix = 0; ix < 4; ix++) {
417
188k
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
188k
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
188k
    }
420
47.0k
  }
421
  // AFV coefficients in (even, even) positions.
422
11.7k
  HWY_ALIGN float coeff[4 * 4];
423
11.7k
  AFVDCT4x4(block, coeff);
424
58.8k
  for (size_t iy = 0; iy < 4; iy++) {
425
235k
    for (size_t ix = 0; ix < 4; ix++) {
426
188k
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
188k
    }
428
47.0k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
11.7k
  ComputeScaledDCT<4, 4>()(
431
11.7k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
11.7k
              pixels_stride),
433
11.7k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
58.8k
  for (size_t iy = 0; iy < 4; iy++) {
436
423k
    for (size_t ix = 0; ix < 8; ix++) {
437
376k
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
376k
    }
439
47.0k
  }
440
  // 4x8 DCT of the other half of the block.
441
11.7k
  ComputeScaledDCT<4, 8>()(
442
11.7k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
11.7k
      block, scratch_space);
444
58.8k
  for (size_t iy = 0; iy < 4; iy++) {
445
423k
    for (size_t ix = 0; ix < 8; ix++) {
446
376k
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
376k
    }
448
47.0k
  }
449
11.7k
  float block00 = coefficients[0] * 0.25f;
450
11.7k
  float block01 = coefficients[1];
451
11.7k
  float block10 = coefficients[8];
452
11.7k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
11.7k
  coefficients[1] = (block00 - block01) * 0.5f;
454
11.7k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
11.7k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
8.09k
                            float* JXL_RESTRICT coefficients) {
411
8.09k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
8.09k
  size_t afv_x = afv_kind & 1;
413
8.09k
  size_t afv_y = afv_kind / 2;
414
8.09k
  HWY_ALIGN float block[4 * 8] = {};
415
40.4k
  for (size_t iy = 0; iy < 4; iy++) {
416
161k
    for (size_t ix = 0; ix < 4; ix++) {
417
129k
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
129k
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
129k
    }
420
32.3k
  }
421
  // AFV coefficients in (even, even) positions.
422
8.09k
  HWY_ALIGN float coeff[4 * 4];
423
8.09k
  AFVDCT4x4(block, coeff);
424
40.4k
  for (size_t iy = 0; iy < 4; iy++) {
425
161k
    for (size_t ix = 0; ix < 4; ix++) {
426
129k
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
129k
    }
428
32.3k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
8.09k
  ComputeScaledDCT<4, 4>()(
431
8.09k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
8.09k
              pixels_stride),
433
8.09k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
40.4k
  for (size_t iy = 0; iy < 4; iy++) {
436
291k
    for (size_t ix = 0; ix < 8; ix++) {
437
259k
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
259k
    }
439
32.3k
  }
440
  // 4x8 DCT of the other half of the block.
441
8.09k
  ComputeScaledDCT<4, 8>()(
442
8.09k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
8.09k
      block, scratch_space);
444
40.4k
  for (size_t iy = 0; iy < 4; iy++) {
445
291k
    for (size_t ix = 0; ix < 8; ix++) {
446
259k
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
259k
    }
448
32.3k
  }
449
8.09k
  float block00 = coefficients[0] * 0.25f;
450
8.09k
  float block01 = coefficients[1];
451
8.09k
  float block10 = coefficients[8];
452
8.09k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
8.09k
  coefficients[1] = (block00 - block01) * 0.5f;
454
8.09k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
8.09k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
10.0k
                            float* JXL_RESTRICT coefficients) {
411
10.0k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
10.0k
  size_t afv_x = afv_kind & 1;
413
10.0k
  size_t afv_y = afv_kind / 2;
414
10.0k
  HWY_ALIGN float block[4 * 8] = {};
415
50.3k
  for (size_t iy = 0; iy < 4; iy++) {
416
201k
    for (size_t ix = 0; ix < 4; ix++) {
417
161k
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
161k
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
161k
    }
420
40.3k
  }
421
  // AFV coefficients in (even, even) positions.
422
10.0k
  HWY_ALIGN float coeff[4 * 4];
423
10.0k
  AFVDCT4x4(block, coeff);
424
50.3k
  for (size_t iy = 0; iy < 4; iy++) {
425
201k
    for (size_t ix = 0; ix < 4; ix++) {
426
161k
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
161k
    }
428
40.3k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
10.0k
  ComputeScaledDCT<4, 4>()(
431
10.0k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
10.0k
              pixels_stride),
433
10.0k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
50.3k
  for (size_t iy = 0; iy < 4; iy++) {
436
362k
    for (size_t ix = 0; ix < 8; ix++) {
437
322k
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
322k
    }
439
40.3k
  }
440
  // 4x8 DCT of the other half of the block.
441
10.0k
  ComputeScaledDCT<4, 8>()(
442
10.0k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
10.0k
      block, scratch_space);
444
50.3k
  for (size_t iy = 0; iy < 4; iy++) {
445
362k
    for (size_t ix = 0; ix < 8; ix++) {
446
322k
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
322k
    }
448
40.3k
  }
449
10.0k
  float block00 = coefficients[0] * 0.25f;
450
10.0k
  float block01 = coefficients[1];
451
10.0k
  float block10 = coefficients[8];
452
10.0k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
10.0k
  coefficients[1] = (block00 - block01) * 0.5f;
454
10.0k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
10.0k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
14.6k
                            float* JXL_RESTRICT coefficients) {
411
14.6k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
14.6k
  size_t afv_x = afv_kind & 1;
413
14.6k
  size_t afv_y = afv_kind / 2;
414
14.6k
  HWY_ALIGN float block[4 * 8] = {};
415
73.3k
  for (size_t iy = 0; iy < 4; iy++) {
416
293k
    for (size_t ix = 0; ix < 4; ix++) {
417
234k
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
234k
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
234k
    }
420
58.6k
  }
421
  // AFV coefficients in (even, even) positions.
422
14.6k
  HWY_ALIGN float coeff[4 * 4];
423
14.6k
  AFVDCT4x4(block, coeff);
424
73.3k
  for (size_t iy = 0; iy < 4; iy++) {
425
293k
    for (size_t ix = 0; ix < 4; ix++) {
426
234k
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
234k
    }
428
58.6k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
14.6k
  ComputeScaledDCT<4, 4>()(
431
14.6k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
14.6k
              pixels_stride),
433
14.6k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
73.3k
  for (size_t iy = 0; iy < 4; iy++) {
436
527k
    for (size_t ix = 0; ix < 8; ix++) {
437
469k
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
469k
    }
439
58.6k
  }
440
  // 4x8 DCT of the other half of the block.
441
14.6k
  ComputeScaledDCT<4, 8>()(
442
14.6k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
14.6k
      block, scratch_space);
444
73.3k
  for (size_t iy = 0; iy < 4; iy++) {
445
527k
    for (size_t ix = 0; ix < 8; ix++) {
446
469k
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
469k
    }
448
58.6k
  }
449
14.6k
  float block00 = coefficients[0] * 0.25f;
450
14.6k
  float block01 = coefficients[1];
451
14.6k
  float block10 = coefficients[8];
452
14.6k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
14.6k
  coefficients[1] = (block00 - block01) * 0.5f;
454
14.6k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
14.6k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
11.7k
                            float* JXL_RESTRICT coefficients) {
411
11.7k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
11.7k
  size_t afv_x = afv_kind & 1;
413
11.7k
  size_t afv_y = afv_kind / 2;
414
11.7k
  HWY_ALIGN float block[4 * 8] = {};
415
58.8k
  for (size_t iy = 0; iy < 4; iy++) {
416
235k
    for (size_t ix = 0; ix < 4; ix++) {
417
188k
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
188k
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
188k
    }
420
47.0k
  }
421
  // AFV coefficients in (even, even) positions.
422
11.7k
  HWY_ALIGN float coeff[4 * 4];
423
11.7k
  AFVDCT4x4(block, coeff);
424
58.8k
  for (size_t iy = 0; iy < 4; iy++) {
425
235k
    for (size_t ix = 0; ix < 4; ix++) {
426
188k
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
188k
    }
428
47.0k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
11.7k
  ComputeScaledDCT<4, 4>()(
431
11.7k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
11.7k
              pixels_stride),
433
11.7k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
58.8k
  for (size_t iy = 0; iy < 4; iy++) {
436
423k
    for (size_t ix = 0; ix < 8; ix++) {
437
376k
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
376k
    }
439
47.0k
  }
440
  // 4x8 DCT of the other half of the block.
441
11.7k
  ComputeScaledDCT<4, 8>()(
442
11.7k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
11.7k
      block, scratch_space);
444
58.8k
  for (size_t iy = 0; iy < 4; iy++) {
445
423k
    for (size_t ix = 0; ix < 8; ix++) {
446
376k
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
376k
    }
448
47.0k
  }
449
11.7k
  float block00 = coefficients[0] * 0.25f;
450
11.7k
  float block01 = coefficients[1];
451
11.7k
  float block10 = coefficients[8];
452
11.7k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
11.7k
  coefficients[1] = (block00 - block01) * 0.5f;
454
11.7k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
11.7k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
8.09k
                            float* JXL_RESTRICT coefficients) {
411
8.09k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
8.09k
  size_t afv_x = afv_kind & 1;
413
8.09k
  size_t afv_y = afv_kind / 2;
414
8.09k
  HWY_ALIGN float block[4 * 8] = {};
415
40.4k
  for (size_t iy = 0; iy < 4; iy++) {
416
161k
    for (size_t ix = 0; ix < 4; ix++) {
417
129k
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
129k
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
129k
    }
420
32.3k
  }
421
  // AFV coefficients in (even, even) positions.
422
8.09k
  HWY_ALIGN float coeff[4 * 4];
423
8.09k
  AFVDCT4x4(block, coeff);
424
40.4k
  for (size_t iy = 0; iy < 4; iy++) {
425
161k
    for (size_t ix = 0; ix < 4; ix++) {
426
129k
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
129k
    }
428
32.3k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
8.09k
  ComputeScaledDCT<4, 4>()(
431
8.09k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
8.09k
              pixels_stride),
433
8.09k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
40.4k
  for (size_t iy = 0; iy < 4; iy++) {
436
291k
    for (size_t ix = 0; ix < 8; ix++) {
437
259k
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
259k
    }
439
32.3k
  }
440
  // 4x8 DCT of the other half of the block.
441
8.09k
  ComputeScaledDCT<4, 8>()(
442
8.09k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
8.09k
      block, scratch_space);
444
40.4k
  for (size_t iy = 0; iy < 4; iy++) {
445
291k
    for (size_t ix = 0; ix < 8; ix++) {
446
259k
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
259k
    }
448
32.3k
  }
449
8.09k
  float block00 = coefficients[0] * 0.25f;
450
8.09k
  float block01 = coefficients[1];
451
8.09k
  float block10 = coefficients[8];
452
8.09k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
8.09k
  coefficients[1] = (block00 - block01) * 0.5f;
454
8.09k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
8.09k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
10.0k
                            float* JXL_RESTRICT coefficients) {
411
10.0k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
10.0k
  size_t afv_x = afv_kind & 1;
413
10.0k
  size_t afv_y = afv_kind / 2;
414
10.0k
  HWY_ALIGN float block[4 * 8] = {};
415
50.3k
  for (size_t iy = 0; iy < 4; iy++) {
416
201k
    for (size_t ix = 0; ix < 4; ix++) {
417
161k
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
161k
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
161k
    }
420
40.3k
  }
421
  // AFV coefficients in (even, even) positions.
422
10.0k
  HWY_ALIGN float coeff[4 * 4];
423
10.0k
  AFVDCT4x4(block, coeff);
424
50.3k
  for (size_t iy = 0; iy < 4; iy++) {
425
201k
    for (size_t ix = 0; ix < 4; ix++) {
426
161k
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
161k
    }
428
40.3k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
10.0k
  ComputeScaledDCT<4, 4>()(
431
10.0k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
10.0k
              pixels_stride),
433
10.0k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
50.3k
  for (size_t iy = 0; iy < 4; iy++) {
436
362k
    for (size_t ix = 0; ix < 8; ix++) {
437
322k
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
322k
    }
439
40.3k
  }
440
  // 4x8 DCT of the other half of the block.
441
10.0k
  ComputeScaledDCT<4, 8>()(
442
10.0k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
10.0k
      block, scratch_space);
444
50.3k
  for (size_t iy = 0; iy < 4; iy++) {
445
362k
    for (size_t ix = 0; ix < 8; ix++) {
446
322k
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
322k
    }
448
40.3k
  }
449
10.0k
  float block00 = coefficients[0] * 0.25f;
450
10.0k
  float block01 = coefficients[1];
451
10.0k
  float block10 = coefficients[8];
452
10.0k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
10.0k
  coefficients[1] = (block00 - block01) * 0.5f;
454
10.0k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
10.0k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
14.6k
                            float* JXL_RESTRICT coefficients) {
411
14.6k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
14.6k
  size_t afv_x = afv_kind & 1;
413
14.6k
  size_t afv_y = afv_kind / 2;
414
14.6k
  HWY_ALIGN float block[4 * 8] = {};
415
73.3k
  for (size_t iy = 0; iy < 4; iy++) {
416
293k
    for (size_t ix = 0; ix < 4; ix++) {
417
234k
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
234k
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
234k
    }
420
58.6k
  }
421
  // AFV coefficients in (even, even) positions.
422
14.6k
  HWY_ALIGN float coeff[4 * 4];
423
14.6k
  AFVDCT4x4(block, coeff);
424
73.3k
  for (size_t iy = 0; iy < 4; iy++) {
425
293k
    for (size_t ix = 0; ix < 4; ix++) {
426
234k
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
234k
    }
428
58.6k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
14.6k
  ComputeScaledDCT<4, 4>()(
431
14.6k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
14.6k
              pixels_stride),
433
14.6k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
73.3k
  for (size_t iy = 0; iy < 4; iy++) {
436
527k
    for (size_t ix = 0; ix < 8; ix++) {
437
469k
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
469k
    }
439
58.6k
  }
440
  // 4x8 DCT of the other half of the block.
441
14.6k
  ComputeScaledDCT<4, 8>()(
442
14.6k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
14.6k
      block, scratch_space);
444
73.3k
  for (size_t iy = 0; iy < 4; iy++) {
445
527k
    for (size_t ix = 0; ix < 8; ix++) {
446
469k
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
469k
    }
448
58.6k
  }
449
14.6k
  float block00 = coefficients[0] * 0.25f;
450
14.6k
  float block01 = coefficients[1];
451
14.6k
  float block10 = coefficients[8];
452
14.6k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
14.6k
  coefficients[1] = (block00 - block01) * 0.5f;
454
14.6k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
14.6k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
1.05M
                            float* JXL_RESTRICT coefficients) {
411
1.05M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
1.05M
  size_t afv_x = afv_kind & 1;
413
1.05M
  size_t afv_y = afv_kind / 2;
414
1.05M
  HWY_ALIGN float block[4 * 8] = {};
415
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
416
21.1M
    for (size_t ix = 0; ix < 4; ix++) {
417
16.9M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
16.9M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
16.9M
    }
420
4.23M
  }
421
  // AFV coefficients in (even, even) positions.
422
1.05M
  HWY_ALIGN float coeff[4 * 4];
423
1.05M
  AFVDCT4x4(block, coeff);
424
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
425
21.1M
    for (size_t ix = 0; ix < 4; ix++) {
426
16.9M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
16.9M
    }
428
4.23M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
1.05M
  ComputeScaledDCT<4, 4>()(
431
1.05M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
1.05M
              pixels_stride),
433
1.05M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
436
38.1M
    for (size_t ix = 0; ix < 8; ix++) {
437
33.9M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
33.9M
    }
439
4.23M
  }
440
  // 4x8 DCT of the other half of the block.
441
1.05M
  ComputeScaledDCT<4, 8>()(
442
1.05M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
1.05M
      block, scratch_space);
444
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
445
38.1M
    for (size_t ix = 0; ix < 8; ix++) {
446
33.9M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
33.9M
    }
448
4.23M
  }
449
1.05M
  float block00 = coefficients[0] * 0.25f;
450
1.05M
  float block01 = coefficients[1];
451
1.05M
  float block10 = coefficients[8];
452
1.05M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
1.05M
  coefficients[1] = (block00 - block01) * 0.5f;
454
1.05M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
1.05M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
1.05M
                            float* JXL_RESTRICT coefficients) {
411
1.05M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
1.05M
  size_t afv_x = afv_kind & 1;
413
1.05M
  size_t afv_y = afv_kind / 2;
414
1.05M
  HWY_ALIGN float block[4 * 8] = {};
415
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
416
21.1M
    for (size_t ix = 0; ix < 4; ix++) {
417
16.9M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
16.9M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
16.9M
    }
420
4.23M
  }
421
  // AFV coefficients in (even, even) positions.
422
1.05M
  HWY_ALIGN float coeff[4 * 4];
423
1.05M
  AFVDCT4x4(block, coeff);
424
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
425
21.1M
    for (size_t ix = 0; ix < 4; ix++) {
426
16.9M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
16.9M
    }
428
4.23M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
1.05M
  ComputeScaledDCT<4, 4>()(
431
1.05M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
1.05M
              pixels_stride),
433
1.05M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
436
38.1M
    for (size_t ix = 0; ix < 8; ix++) {
437
33.9M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
33.9M
    }
439
4.23M
  }
440
  // 4x8 DCT of the other half of the block.
441
1.05M
  ComputeScaledDCT<4, 8>()(
442
1.05M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
1.05M
      block, scratch_space);
444
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
445
38.1M
    for (size_t ix = 0; ix < 8; ix++) {
446
33.9M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
33.9M
    }
448
4.23M
  }
449
1.05M
  float block00 = coefficients[0] * 0.25f;
450
1.05M
  float block01 = coefficients[1];
451
1.05M
  float block10 = coefficients[8];
452
1.05M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
1.05M
  coefficients[1] = (block00 - block01) * 0.5f;
454
1.05M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
1.05M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
1.05M
                            float* JXL_RESTRICT coefficients) {
411
1.05M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
1.05M
  size_t afv_x = afv_kind & 1;
413
1.05M
  size_t afv_y = afv_kind / 2;
414
1.05M
  HWY_ALIGN float block[4 * 8] = {};
415
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
416
21.1M
    for (size_t ix = 0; ix < 4; ix++) {
417
16.9M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
16.9M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
16.9M
    }
420
4.23M
  }
421
  // AFV coefficients in (even, even) positions.
422
1.05M
  HWY_ALIGN float coeff[4 * 4];
423
1.05M
  AFVDCT4x4(block, coeff);
424
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
425
21.1M
    for (size_t ix = 0; ix < 4; ix++) {
426
16.9M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
16.9M
    }
428
4.23M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
1.05M
  ComputeScaledDCT<4, 4>()(
431
1.05M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
1.05M
              pixels_stride),
433
1.05M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
436
38.1M
    for (size_t ix = 0; ix < 8; ix++) {
437
33.9M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
33.9M
    }
439
4.23M
  }
440
  // 4x8 DCT of the other half of the block.
441
1.05M
  ComputeScaledDCT<4, 8>()(
442
1.05M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
1.05M
      block, scratch_space);
444
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
445
38.1M
    for (size_t ix = 0; ix < 8; ix++) {
446
33.9M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
33.9M
    }
448
4.23M
  }
449
1.05M
  float block00 = coefficients[0] * 0.25f;
450
1.05M
  float block01 = coefficients[1];
451
1.05M
  float block10 = coefficients[8];
452
1.05M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
1.05M
  coefficients[1] = (block00 - block01) * 0.5f;
454
1.05M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
1.05M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
1.05M
                            float* JXL_RESTRICT coefficients) {
411
1.05M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
1.05M
  size_t afv_x = afv_kind & 1;
413
1.05M
  size_t afv_y = afv_kind / 2;
414
1.05M
  HWY_ALIGN float block[4 * 8] = {};
415
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
416
21.1M
    for (size_t ix = 0; ix < 4; ix++) {
417
16.9M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
16.9M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
16.9M
    }
420
4.23M
  }
421
  // AFV coefficients in (even, even) positions.
422
1.05M
  HWY_ALIGN float coeff[4 * 4];
423
1.05M
  AFVDCT4x4(block, coeff);
424
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
425
21.1M
    for (size_t ix = 0; ix < 4; ix++) {
426
16.9M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
16.9M
    }
428
4.23M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
1.05M
  ComputeScaledDCT<4, 4>()(
431
1.05M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
1.05M
              pixels_stride),
433
1.05M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
436
38.1M
    for (size_t ix = 0; ix < 8; ix++) {
437
33.9M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
33.9M
    }
439
4.23M
  }
440
  // 4x8 DCT of the other half of the block.
441
1.05M
  ComputeScaledDCT<4, 8>()(
442
1.05M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
1.05M
      block, scratch_space);
444
5.29M
  for (size_t iy = 0; iy < 4; iy++) {
445
38.1M
    for (size_t ix = 0; ix < 8; ix++) {
446
33.9M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
33.9M
    }
448
4.23M
  }
449
1.05M
  float block00 = coefficients[0] * 0.25f;
450
1.05M
  float block01 = coefficients[1];
451
1.05M
  float block10 = coefficients[8];
452
1.05M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
1.05M
  coefficients[1] = (block00 - block01) * 0.5f;
454
1.05M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
1.05M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
456
457
HWY_MAYBE_UNUSED void TransformFromPixels(const AcStrategyType strategy,
458
                                          const float* JXL_RESTRICT pixels,
459
                                          size_t pixels_stride,
460
                                          float* JXL_RESTRICT coefficients,
461
15.3M
                                          float* JXL_RESTRICT scratch_space) {
462
15.3M
  using Type = AcStrategyType;
463
15.3M
  switch (strategy) {
464
1.17M
    case Type::IDENTITY: {
465
3.51M
      for (size_t y = 0; y < 2; y++) {
466
7.02M
        for (size_t x = 0; x < 2; x++) {
467
4.68M
          float block_dc = 0;
468
23.4M
          for (size_t iy = 0; iy < 4; iy++) {
469
93.7M
            for (size_t ix = 0; ix < 4; ix++) {
470
74.9M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
74.9M
            }
472
18.7M
          }
473
4.68M
          block_dc *= 1.0f / 16;
474
23.4M
          for (size_t iy = 0; iy < 4; iy++) {
475
93.7M
            for (size_t ix = 0; ix < 4; ix++) {
476
74.9M
              if (ix == 1 && iy == 1) continue;
477
70.2M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
70.2M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
70.2M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
70.2M
            }
481
18.7M
          }
482
4.68M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
4.68M
          coefficients[y * 8 + x] = block_dc;
484
4.68M
        }
485
2.34M
      }
486
1.17M
      float block00 = coefficients[0];
487
1.17M
      float block01 = coefficients[1];
488
1.17M
      float block10 = coefficients[8];
489
1.17M
      float block11 = coefficients[9];
490
1.17M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
1.17M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
1.17M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
1.17M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
1.17M
      break;
495
0
    }
496
1.12M
    case Type::DCT8X4: {
497
3.38M
      for (size_t x = 0; x < 2; x++) {
498
2.25M
        HWY_ALIGN float block[4 * 8];
499
2.25M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
2.25M
                                 scratch_space);
501
11.2M
        for (size_t iy = 0; iy < 4; iy++) {
502
81.3M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
72.2M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
72.2M
          }
506
9.03M
        }
507
2.25M
      }
508
1.12M
      float block0 = coefficients[0];
509
1.12M
      float block1 = coefficients[8];
510
1.12M
      coefficients[0] = (block0 + block1) * 0.5f;
511
1.12M
      coefficients[8] = (block0 - block1) * 0.5f;
512
1.12M
      break;
513
0
    }
514
1.09M
    case Type::DCT4X8: {
515
3.28M
      for (size_t y = 0; y < 2; y++) {
516
2.19M
        HWY_ALIGN float block[4 * 8];
517
2.19M
        ComputeScaledDCT<4, 8>()(
518
2.19M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
2.19M
            scratch_space);
520
10.9M
        for (size_t iy = 0; iy < 4; iy++) {
521
78.9M
          for (size_t ix = 0; ix < 8; ix++) {
522
70.1M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
70.1M
          }
524
8.77M
        }
525
2.19M
      }
526
1.09M
      float block0 = coefficients[0];
527
1.09M
      float block1 = coefficients[8];
528
1.09M
      coefficients[0] = (block0 + block1) * 0.5f;
529
1.09M
      coefficients[8] = (block0 - block1) * 0.5f;
530
1.09M
      break;
531
0
    }
532
1.05M
    case Type::DCT4X4: {
533
3.17M
      for (size_t y = 0; y < 2; y++) {
534
6.35M
        for (size_t x = 0; x < 2; x++) {
535
4.23M
          HWY_ALIGN float block[4 * 4];
536
4.23M
          ComputeScaledDCT<4, 4>()(
537
4.23M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
4.23M
              block, scratch_space);
539
21.1M
          for (size_t iy = 0; iy < 4; iy++) {
540
84.7M
            for (size_t ix = 0; ix < 4; ix++) {
541
67.8M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
67.8M
            }
543
16.9M
          }
544
4.23M
        }
545
2.11M
      }
546
1.05M
      float block00 = coefficients[0];
547
1.05M
      float block01 = coefficients[1];
548
1.05M
      float block10 = coefficients[8];
549
1.05M
      float block11 = coefficients[9];
550
1.05M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
1.05M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
1.05M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
1.05M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
1.05M
      break;
555
0
    }
556
1.33M
    case Type::DCT2X2: {
557
1.33M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
1.33M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
1.33M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
1.33M
      break;
561
0
    }
562
453k
    case Type::DCT16X16: {
563
453k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
453k
                                 scratch_space);
565
453k
      break;
566
0
    }
567
867k
    case Type::DCT16X8: {
568
867k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
867k
                                scratch_space);
570
867k
      break;
571
0
    }
572
875k
    case Type::DCT8X16: {
573
875k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
875k
                                scratch_space);
575
875k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
177k
    case Type::DCT32X16: {
588
177k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
177k
                                 scratch_space);
590
177k
      break;
591
0
    }
592
181k
    case Type::DCT16X32: {
593
181k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
181k
                                 scratch_space);
595
181k
      break;
596
0
    }
597
117k
    case Type::DCT32X32: {
598
117k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
117k
                                 scratch_space);
600
117k
      break;
601
0
    }
602
2.45M
    case Type::DCT: {
603
2.45M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
2.45M
                               scratch_space);
605
2.45M
      break;
606
0
    }
607
1.08M
    case Type::AFV0: {
608
1.08M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
1.08M
      break;
610
0
    }
611
1.07M
    case Type::AFV1: {
612
1.07M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
1.07M
      break;
614
0
    }
615
1.07M
    case Type::AFV2: {
616
1.07M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
1.07M
      break;
618
0
    }
619
1.08M
    case Type::AFV3: {
620
1.08M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
1.08M
      break;
622
0
    }
623
17.5k
    case Type::DCT64X64: {
624
17.5k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
17.5k
                                 scratch_space);
626
17.5k
      break;
627
0
    }
628
57.9k
    case Type::DCT64X32: {
629
57.9k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
57.9k
                                 scratch_space);
631
57.9k
      break;
632
0
    }
633
42.2k
    case Type::DCT32X64: {
634
42.2k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
42.2k
                                 scratch_space);
636
42.2k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
15.3M
  }
669
15.3M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
545k
                                          float* JXL_RESTRICT scratch_space) {
462
545k
  using Type = AcStrategyType;
463
545k
  switch (strategy) {
464
55.9k
    case Type::IDENTITY: {
465
167k
      for (size_t y = 0; y < 2; y++) {
466
335k
        for (size_t x = 0; x < 2; x++) {
467
223k
          float block_dc = 0;
468
1.11M
          for (size_t iy = 0; iy < 4; iy++) {
469
4.47M
            for (size_t ix = 0; ix < 4; ix++) {
470
3.58M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
3.58M
            }
472
895k
          }
473
223k
          block_dc *= 1.0f / 16;
474
1.11M
          for (size_t iy = 0; iy < 4; iy++) {
475
4.47M
            for (size_t ix = 0; ix < 4; ix++) {
476
3.58M
              if (ix == 1 && iy == 1) continue;
477
3.35M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
3.35M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
3.35M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
3.35M
            }
481
895k
          }
482
223k
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
223k
          coefficients[y * 8 + x] = block_dc;
484
223k
        }
485
111k
      }
486
55.9k
      float block00 = coefficients[0];
487
55.9k
      float block01 = coefficients[1];
488
55.9k
      float block10 = coefficients[8];
489
55.9k
      float block11 = coefficients[9];
490
55.9k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
55.9k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
55.9k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
55.9k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
55.9k
      break;
495
0
    }
496
34.8k
    case Type::DCT8X4: {
497
104k
      for (size_t x = 0; x < 2; x++) {
498
69.7k
        HWY_ALIGN float block[4 * 8];
499
69.7k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
69.7k
                                 scratch_space);
501
348k
        for (size_t iy = 0; iy < 4; iy++) {
502
2.50M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
2.23M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
2.23M
          }
506
278k
        }
507
69.7k
      }
508
34.8k
      float block0 = coefficients[0];
509
34.8k
      float block1 = coefficients[8];
510
34.8k
      coefficients[0] = (block0 + block1) * 0.5f;
511
34.8k
      coefficients[8] = (block0 - block1) * 0.5f;
512
34.8k
      break;
513
0
    }
514
18.3k
    case Type::DCT4X8: {
515
55.1k
      for (size_t y = 0; y < 2; y++) {
516
36.7k
        HWY_ALIGN float block[4 * 8];
517
36.7k
        ComputeScaledDCT<4, 8>()(
518
36.7k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
36.7k
            scratch_space);
520
183k
        for (size_t iy = 0; iy < 4; iy++) {
521
1.32M
          for (size_t ix = 0; ix < 8; ix++) {
522
1.17M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
1.17M
          }
524
147k
        }
525
36.7k
      }
526
18.3k
      float block0 = coefficients[0];
527
18.3k
      float block1 = coefficients[8];
528
18.3k
      coefficients[0] = (block0 + block1) * 0.5f;
529
18.3k
      coefficients[8] = (block0 - block1) * 0.5f;
530
18.3k
      break;
531
0
    }
532
0
    case Type::DCT4X4: {
533
0
      for (size_t y = 0; y < 2; y++) {
534
0
        for (size_t x = 0; x < 2; x++) {
535
0
          HWY_ALIGN float block[4 * 4];
536
0
          ComputeScaledDCT<4, 4>()(
537
0
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
0
              block, scratch_space);
539
0
          for (size_t iy = 0; iy < 4; iy++) {
540
0
            for (size_t ix = 0; ix < 4; ix++) {
541
0
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
0
            }
543
0
          }
544
0
        }
545
0
      }
546
0
      float block00 = coefficients[0];
547
0
      float block01 = coefficients[1];
548
0
      float block10 = coefficients[8];
549
0
      float block11 = coefficients[9];
550
0
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
0
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
0
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
0
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
0
      break;
555
0
    }
556
135k
    case Type::DCT2X2: {
557
135k
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
135k
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
135k
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
135k
      break;
561
0
    }
562
16.7k
    case Type::DCT16X16: {
563
16.7k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
16.7k
                                 scratch_space);
565
16.7k
      break;
566
0
    }
567
19.5k
    case Type::DCT16X8: {
568
19.5k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
19.5k
                                scratch_space);
570
19.5k
      break;
571
0
    }
572
22.3k
    case Type::DCT8X16: {
573
22.3k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
22.3k
                                scratch_space);
575
22.3k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
6.44k
    case Type::DCT32X16: {
588
6.44k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
6.44k
                                 scratch_space);
590
6.44k
      break;
591
0
    }
592
7.68k
    case Type::DCT16X32: {
593
7.68k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
7.68k
                                 scratch_space);
595
7.68k
      break;
596
0
    }
597
15.6k
    case Type::DCT32X32: {
598
15.6k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
15.6k
                                 scratch_space);
600
15.6k
      break;
601
0
    }
602
165k
    case Type::DCT: {
603
165k
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
165k
                               scratch_space);
605
165k
      break;
606
0
    }
607
11.7k
    case Type::AFV0: {
608
11.7k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
11.7k
      break;
610
0
    }
611
8.09k
    case Type::AFV1: {
612
8.09k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
8.09k
      break;
614
0
    }
615
10.0k
    case Type::AFV2: {
616
10.0k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
10.0k
      break;
618
0
    }
619
14.6k
    case Type::AFV3: {
620
14.6k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
14.6k
      break;
622
0
    }
623
1.26k
    case Type::DCT64X64: {
624
1.26k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
1.26k
                                 scratch_space);
626
1.26k
      break;
627
0
    }
628
198
    case Type::DCT64X32: {
629
198
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
198
                                 scratch_space);
631
198
      break;
632
0
    }
633
96
    case Type::DCT32X64: {
634
96
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
96
                                 scratch_space);
636
96
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
545k
  }
669
545k
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
1.60M
                                          float* JXL_RESTRICT scratch_space) {
462
1.60M
  using Type = AcStrategyType;
463
1.60M
  switch (strategy) {
464
55.9k
    case Type::IDENTITY: {
465
167k
      for (size_t y = 0; y < 2; y++) {
466
335k
        for (size_t x = 0; x < 2; x++) {
467
223k
          float block_dc = 0;
468
1.11M
          for (size_t iy = 0; iy < 4; iy++) {
469
4.47M
            for (size_t ix = 0; ix < 4; ix++) {
470
3.58M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
3.58M
            }
472
895k
          }
473
223k
          block_dc *= 1.0f / 16;
474
1.11M
          for (size_t iy = 0; iy < 4; iy++) {
475
4.47M
            for (size_t ix = 0; ix < 4; ix++) {
476
3.58M
              if (ix == 1 && iy == 1) continue;
477
3.35M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
3.35M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
3.35M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
3.35M
            }
481
895k
          }
482
223k
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
223k
          coefficients[y * 8 + x] = block_dc;
484
223k
        }
485
111k
      }
486
55.9k
      float block00 = coefficients[0];
487
55.9k
      float block01 = coefficients[1];
488
55.9k
      float block10 = coefficients[8];
489
55.9k
      float block11 = coefficients[9];
490
55.9k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
55.9k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
55.9k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
55.9k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
55.9k
      break;
495
0
    }
496
34.8k
    case Type::DCT8X4: {
497
104k
      for (size_t x = 0; x < 2; x++) {
498
69.7k
        HWY_ALIGN float block[4 * 8];
499
69.7k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
69.7k
                                 scratch_space);
501
348k
        for (size_t iy = 0; iy < 4; iy++) {
502
2.50M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
2.23M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
2.23M
          }
506
278k
        }
507
69.7k
      }
508
34.8k
      float block0 = coefficients[0];
509
34.8k
      float block1 = coefficients[8];
510
34.8k
      coefficients[0] = (block0 + block1) * 0.5f;
511
34.8k
      coefficients[8] = (block0 - block1) * 0.5f;
512
34.8k
      break;
513
0
    }
514
18.3k
    case Type::DCT4X8: {
515
55.1k
      for (size_t y = 0; y < 2; y++) {
516
36.7k
        HWY_ALIGN float block[4 * 8];
517
36.7k
        ComputeScaledDCT<4, 8>()(
518
36.7k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
36.7k
            scratch_space);
520
183k
        for (size_t iy = 0; iy < 4; iy++) {
521
1.32M
          for (size_t ix = 0; ix < 8; ix++) {
522
1.17M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
1.17M
          }
524
147k
        }
525
36.7k
      }
526
18.3k
      float block0 = coefficients[0];
527
18.3k
      float block1 = coefficients[8];
528
18.3k
      coefficients[0] = (block0 + block1) * 0.5f;
529
18.3k
      coefficients[8] = (block0 - block1) * 0.5f;
530
18.3k
      break;
531
0
    }
532
0
    case Type::DCT4X4: {
533
0
      for (size_t y = 0; y < 2; y++) {
534
0
        for (size_t x = 0; x < 2; x++) {
535
0
          HWY_ALIGN float block[4 * 4];
536
0
          ComputeScaledDCT<4, 4>()(
537
0
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
0
              block, scratch_space);
539
0
          for (size_t iy = 0; iy < 4; iy++) {
540
0
            for (size_t ix = 0; ix < 4; ix++) {
541
0
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
0
            }
543
0
          }
544
0
        }
545
0
      }
546
0
      float block00 = coefficients[0];
547
0
      float block01 = coefficients[1];
548
0
      float block10 = coefficients[8];
549
0
      float block11 = coefficients[9];
550
0
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
0
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
0
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
0
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
0
      break;
555
0
    }
556
135k
    case Type::DCT2X2: {
557
135k
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
135k
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
135k
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
135k
      break;
561
0
    }
562
16.7k
    case Type::DCT16X16: {
563
16.7k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
16.7k
                                 scratch_space);
565
16.7k
      break;
566
0
    }
567
19.5k
    case Type::DCT16X8: {
568
19.5k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
19.5k
                                scratch_space);
570
19.5k
      break;
571
0
    }
572
22.3k
    case Type::DCT8X16: {
573
22.3k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
22.3k
                                scratch_space);
575
22.3k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
6.44k
    case Type::DCT32X16: {
588
6.44k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
6.44k
                                 scratch_space);
590
6.44k
      break;
591
0
    }
592
7.68k
    case Type::DCT16X32: {
593
7.68k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
7.68k
                                 scratch_space);
595
7.68k
      break;
596
0
    }
597
15.6k
    case Type::DCT32X32: {
598
15.6k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
15.6k
                                 scratch_space);
600
15.6k
      break;
601
0
    }
602
1.22M
    case Type::DCT: {
603
1.22M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
1.22M
                               scratch_space);
605
1.22M
      break;
606
0
    }
607
11.7k
    case Type::AFV0: {
608
11.7k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
11.7k
      break;
610
0
    }
611
8.09k
    case Type::AFV1: {
612
8.09k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
8.09k
      break;
614
0
    }
615
10.0k
    case Type::AFV2: {
616
10.0k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
10.0k
      break;
618
0
    }
619
14.6k
    case Type::AFV3: {
620
14.6k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
14.6k
      break;
622
0
    }
623
1.26k
    case Type::DCT64X64: {
624
1.26k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
1.26k
                                 scratch_space);
626
1.26k
      break;
627
0
    }
628
198
    case Type::DCT64X32: {
629
198
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
198
                                 scratch_space);
631
198
      break;
632
0
    }
633
96
    case Type::DCT32X64: {
634
96
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
96
                                 scratch_space);
636
96
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
1.60M
  }
669
1.60M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
13.2M
                                          float* JXL_RESTRICT scratch_space) {
462
13.2M
  using Type = AcStrategyType;
463
13.2M
  switch (strategy) {
464
1.05M
    case Type::IDENTITY: {
465
3.17M
      for (size_t y = 0; y < 2; y++) {
466
6.35M
        for (size_t x = 0; x < 2; x++) {
467
4.23M
          float block_dc = 0;
468
21.1M
          for (size_t iy = 0; iy < 4; iy++) {
469
84.7M
            for (size_t ix = 0; ix < 4; ix++) {
470
67.8M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
67.8M
            }
472
16.9M
          }
473
4.23M
          block_dc *= 1.0f / 16;
474
21.1M
          for (size_t iy = 0; iy < 4; iy++) {
475
84.7M
            for (size_t ix = 0; ix < 4; ix++) {
476
67.8M
              if (ix == 1 && iy == 1) continue;
477
63.5M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
63.5M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
63.5M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
63.5M
            }
481
16.9M
          }
482
4.23M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
4.23M
          coefficients[y * 8 + x] = block_dc;
484
4.23M
        }
485
2.11M
      }
486
1.05M
      float block00 = coefficients[0];
487
1.05M
      float block01 = coefficients[1];
488
1.05M
      float block10 = coefficients[8];
489
1.05M
      float block11 = coefficients[9];
490
1.05M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
1.05M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
1.05M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
1.05M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
1.05M
      break;
495
0
    }
496
1.05M
    case Type::DCT8X4: {
497
3.17M
      for (size_t x = 0; x < 2; x++) {
498
2.11M
        HWY_ALIGN float block[4 * 8];
499
2.11M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
2.11M
                                 scratch_space);
501
10.5M
        for (size_t iy = 0; iy < 4; iy++) {
502
76.2M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
67.8M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
67.8M
          }
506
8.47M
        }
507
2.11M
      }
508
1.05M
      float block0 = coefficients[0];
509
1.05M
      float block1 = coefficients[8];
510
1.05M
      coefficients[0] = (block0 + block1) * 0.5f;
511
1.05M
      coefficients[8] = (block0 - block1) * 0.5f;
512
1.05M
      break;
513
0
    }
514
1.05M
    case Type::DCT4X8: {
515
3.17M
      for (size_t y = 0; y < 2; y++) {
516
2.11M
        HWY_ALIGN float block[4 * 8];
517
2.11M
        ComputeScaledDCT<4, 8>()(
518
2.11M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
2.11M
            scratch_space);
520
10.5M
        for (size_t iy = 0; iy < 4; iy++) {
521
76.2M
          for (size_t ix = 0; ix < 8; ix++) {
522
67.8M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
67.8M
          }
524
8.47M
        }
525
2.11M
      }
526
1.05M
      float block0 = coefficients[0];
527
1.05M
      float block1 = coefficients[8];
528
1.05M
      coefficients[0] = (block0 + block1) * 0.5f;
529
1.05M
      coefficients[8] = (block0 - block1) * 0.5f;
530
1.05M
      break;
531
0
    }
532
1.05M
    case Type::DCT4X4: {
533
3.17M
      for (size_t y = 0; y < 2; y++) {
534
6.35M
        for (size_t x = 0; x < 2; x++) {
535
4.23M
          HWY_ALIGN float block[4 * 4];
536
4.23M
          ComputeScaledDCT<4, 4>()(
537
4.23M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
4.23M
              block, scratch_space);
539
21.1M
          for (size_t iy = 0; iy < 4; iy++) {
540
84.7M
            for (size_t ix = 0; ix < 4; ix++) {
541
67.8M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
67.8M
            }
543
16.9M
          }
544
4.23M
        }
545
2.11M
      }
546
1.05M
      float block00 = coefficients[0];
547
1.05M
      float block01 = coefficients[1];
548
1.05M
      float block10 = coefficients[8];
549
1.05M
      float block11 = coefficients[9];
550
1.05M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
1.05M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
1.05M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
1.05M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
1.05M
      break;
555
0
    }
556
1.05M
    case Type::DCT2X2: {
557
1.05M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
1.05M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
1.05M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
1.05M
      break;
561
0
    }
562
419k
    case Type::DCT16X16: {
563
419k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
419k
                                 scratch_space);
565
419k
      break;
566
0
    }
567
827k
    case Type::DCT16X8: {
568
827k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
827k
                                scratch_space);
570
827k
      break;
571
0
    }
572
830k
    case Type::DCT8X16: {
573
830k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
830k
                                scratch_space);
575
830k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
164k
    case Type::DCT32X16: {
588
164k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
164k
                                 scratch_space);
590
164k
      break;
591
0
    }
592
166k
    case Type::DCT16X32: {
593
166k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
166k
                                 scratch_space);
595
166k
      break;
596
0
    }
597
86.2k
    case Type::DCT32X32: {
598
86.2k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
86.2k
                                 scratch_space);
600
86.2k
      break;
601
0
    }
602
1.05M
    case Type::DCT: {
603
1.05M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
1.05M
                               scratch_space);
605
1.05M
      break;
606
0
    }
607
1.05M
    case Type::AFV0: {
608
1.05M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
1.05M
      break;
610
0
    }
611
1.05M
    case Type::AFV1: {
612
1.05M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
1.05M
      break;
614
0
    }
615
1.05M
    case Type::AFV2: {
616
1.05M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
1.05M
      break;
618
0
    }
619
1.05M
    case Type::AFV3: {
620
1.05M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
1.05M
      break;
622
0
    }
623
15.0k
    case Type::DCT64X64: {
624
15.0k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
15.0k
                                 scratch_space);
626
15.0k
      break;
627
0
    }
628
57.5k
    case Type::DCT64X32: {
629
57.5k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
57.5k
                                 scratch_space);
631
57.5k
      break;
632
0
    }
633
42.0k
    case Type::DCT32X64: {
634
42.0k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
42.0k
                                 scratch_space);
636
42.0k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
13.2M
  }
669
13.2M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
670
671
// `scratch_space` should be at least 4 * kMaxBlocks * kMaxBlocks elements.
672
HWY_MAYBE_UNUSED void DCFromLowestFrequencies(const AcStrategyType strategy,
673
                                              const float* block, float* dc,
674
                                              size_t dc_stride,
675
2.14M
                                              float* scratch_space) {
676
2.14M
  using Type = AcStrategyType;
677
2.14M
  switch (strategy) {
678
39.1k
    case Type::DCT16X8: {
679
39.1k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
39.1k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
39.1k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
39.1k
      break;
683
0
    }
684
44.6k
    case Type::DCT8X16: {
685
44.6k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
44.6k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
44.6k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
44.6k
      break;
689
0
    }
690
33.5k
    case Type::DCT16X16: {
691
33.5k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
33.5k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
33.5k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
33.5k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
12.8k
    case Type::DCT32X16: {
709
12.8k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
12.8k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
12.8k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
12.8k
      break;
713
0
    }
714
15.3k
    case Type::DCT16X32: {
715
15.3k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
15.3k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
15.3k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
15.3k
      break;
719
0
    }
720
31.2k
    case Type::DCT32X32: {
721
31.2k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
31.2k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
31.2k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
31.2k
      break;
725
0
    }
726
396
    case Type::DCT64X32: {
727
396
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
396
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
396
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
396
      break;
731
0
    }
732
192
    case Type::DCT32X64: {
733
192
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
192
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
192
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
192
      break;
737
0
    }
738
2.53k
    case Type::DCT64X64: {
739
2.53k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
2.53k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
2.53k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
2.53k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
1.39M
    case Type::DCT:
787
1.66M
    case Type::DCT2X2:
788
1.66M
    case Type::DCT4X4:
789
1.69M
    case Type::DCT4X8:
790
1.76M
    case Type::DCT8X4:
791
1.79M
    case Type::AFV0:
792
1.80M
    case Type::AFV1:
793
1.82M
    case Type::AFV2:
794
1.85M
    case Type::AFV3:
795
1.96M
    case Type::IDENTITY:
796
1.96M
      dc[0] = block[0];
797
1.96M
      break;
798
2.14M
  }
799
2.14M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
545k
                                              float* scratch_space) {
676
545k
  using Type = AcStrategyType;
677
545k
  switch (strategy) {
678
19.5k
    case Type::DCT16X8: {
679
19.5k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
19.5k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
19.5k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
19.5k
      break;
683
0
    }
684
22.3k
    case Type::DCT8X16: {
685
22.3k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
22.3k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
22.3k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
22.3k
      break;
689
0
    }
690
16.7k
    case Type::DCT16X16: {
691
16.7k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
16.7k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
16.7k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
16.7k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
6.44k
    case Type::DCT32X16: {
709
6.44k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
6.44k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
6.44k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
6.44k
      break;
713
0
    }
714
7.68k
    case Type::DCT16X32: {
715
7.68k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
7.68k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
7.68k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
7.68k
      break;
719
0
    }
720
15.6k
    case Type::DCT32X32: {
721
15.6k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
15.6k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
15.6k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
15.6k
      break;
725
0
    }
726
198
    case Type::DCT64X32: {
727
198
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
198
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
198
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
198
      break;
731
0
    }
732
96
    case Type::DCT32X64: {
733
96
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
96
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
96
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
96
      break;
737
0
    }
738
1.26k
    case Type::DCT64X64: {
739
1.26k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
1.26k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
1.26k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
1.26k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
165k
    case Type::DCT:
787
301k
    case Type::DCT2X2:
788
301k
    case Type::DCT4X4:
789
319k
    case Type::DCT4X8:
790
354k
    case Type::DCT8X4:
791
366k
    case Type::AFV0:
792
374k
    case Type::AFV1:
793
384k
    case Type::AFV2:
794
399k
    case Type::AFV3:
795
455k
    case Type::IDENTITY:
796
455k
      dc[0] = block[0];
797
455k
      break;
798
545k
  }
799
545k
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
1.60M
                                              float* scratch_space) {
676
1.60M
  using Type = AcStrategyType;
677
1.60M
  switch (strategy) {
678
19.5k
    case Type::DCT16X8: {
679
19.5k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
19.5k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
19.5k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
19.5k
      break;
683
0
    }
684
22.3k
    case Type::DCT8X16: {
685
22.3k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
22.3k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
22.3k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
22.3k
      break;
689
0
    }
690
16.7k
    case Type::DCT16X16: {
691
16.7k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
16.7k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
16.7k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
16.7k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
6.44k
    case Type::DCT32X16: {
709
6.44k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
6.44k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
6.44k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
6.44k
      break;
713
0
    }
714
7.68k
    case Type::DCT16X32: {
715
7.68k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
7.68k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
7.68k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
7.68k
      break;
719
0
    }
720
15.6k
    case Type::DCT32X32: {
721
15.6k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
15.6k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
15.6k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
15.6k
      break;
725
0
    }
726
198
    case Type::DCT64X32: {
727
198
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
198
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
198
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
198
      break;
731
0
    }
732
96
    case Type::DCT32X64: {
733
96
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
96
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
96
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
96
      break;
737
0
    }
738
1.26k
    case Type::DCT64X64: {
739
1.26k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
1.26k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
1.26k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
1.26k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
1.22M
    case Type::DCT:
787
1.36M
    case Type::DCT2X2:
788
1.36M
    case Type::DCT4X4:
789
1.37M
    case Type::DCT4X8:
790
1.41M
    case Type::DCT8X4:
791
1.42M
    case Type::AFV0:
792
1.43M
    case Type::AFV1:
793
1.44M
    case Type::AFV2:
794
1.45M
    case Type::AFV3:
795
1.51M
    case Type::IDENTITY:
796
1.51M
      dc[0] = block[0];
797
1.51M
      break;
798
1.60M
  }
799
1.60M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
800
801
}  // namespace
802
// NOLINTNEXTLINE(google-readability-namespace-comments)
803
}  // namespace HWY_NAMESPACE
804
}  // namespace jxl
805
HWY_AFTER_NAMESPACE();
806
807
#endif  // LIB_JXL_ENC_TRANSFORMS_INL_H_