Coverage Report

Created: 2026-02-14 07:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/enc_transforms-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/base/compiler_specific.h"
7
#include "lib/jxl/frame_dimensions.h"
8
9
#if defined(LIB_JXL_ENC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
10
#ifdef LIB_JXL_ENC_TRANSFORMS_INL_H_
11
#undef LIB_JXL_ENC_TRANSFORMS_INL_H_
12
#else
13
#define LIB_JXL_ENC_TRANSFORMS_INL_H_
14
#endif
15
16
#include <cstddef>
17
#include <cstdint>
18
#include <hwy/highway.h>
19
20
#include "lib/jxl/ac_strategy.h"
21
#include "lib/jxl/dct-inl.h"
22
#include "lib/jxl/dct_scales.h"
23
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
27
enum class AcStrategyType : uint32_t;
28
29
namespace HWY_NAMESPACE {
30
namespace {
31
32
constexpr size_t kMaxBlocks = 32;
33
34
// Inverse of ReinterpretingDCT.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
HWY_INLINE void ReinterpretingIDCT(const float* input,
38
                                   const size_t input_stride, float* output,
39
1.63M
                                   const size_t output_stride, float* scratch) {
40
1.63M
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
1.63M
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
1.63M
  float* block = scratch;
43
1.63M
  if (ROWS < COLS) {
44
1.21M
    for (size_t y = 0; y < LF_ROWS; y++) {
45
2.60M
      for (size_t x = 0; x < LF_COLS; x++) {
46
1.93M
        block[y * COLS + x] = input[y * input_stride + x] *
47
1.93M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
1.93M
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
1.93M
      }
50
669k
    }
51
1.08M
  } else {
52
3.78M
    for (size_t y = 0; y < LF_COLS; y++) {
53
15.0M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
12.3M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
12.3M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
12.3M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
12.3M
      }
58
2.70M
    }
59
1.08M
  }
60
61
1.63M
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
1.63M
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
1.63M
                                  scratch_space);
64
1.63M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
208k
                                   const size_t output_stride, float* scratch) {
40
208k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
208k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
208k
  float* block = scratch;
43
208k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
208k
  } else {
52
417k
    for (size_t y = 0; y < LF_COLS; y++) {
53
626k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
417k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
417k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
417k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
417k
      }
58
208k
    }
59
208k
  }
60
61
208k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
208k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
208k
                                  scratch_space);
64
208k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
224k
                                   const size_t output_stride, float* scratch) {
40
224k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
224k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
224k
  float* block = scratch;
43
224k
  if (ROWS < COLS) {
44
449k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
674k
      for (size_t x = 0; x < LF_COLS; x++) {
46
449k
        block[y * COLS + x] = input[y * input_stride + x] *
47
449k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
449k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
449k
      }
50
224k
    }
51
224k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
224k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
224k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
224k
                                  scratch_space);
64
224k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
147k
                                   const size_t output_stride, float* scratch) {
40
147k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
147k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
147k
  float* block = scratch;
43
147k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
147k
  } else {
52
443k
    for (size_t y = 0; y < LF_COLS; y++) {
53
887k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
591k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
591k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
591k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
591k
      }
58
295k
    }
59
147k
  }
60
61
147k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
147k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
147k
                                  scratch_space);
64
147k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
45.6k
                                   const size_t output_stride, float* scratch) {
40
45.6k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
45.6k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
45.6k
  float* block = scratch;
43
45.6k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
45.6k
  } else {
52
136k
    for (size_t y = 0; y < LF_COLS; y++) {
53
456k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
365k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
365k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
365k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
365k
      }
58
91.2k
    }
59
45.6k
  }
60
61
45.6k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
45.6k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
45.6k
                                  scratch_space);
64
45.6k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
44.5k
                                   const size_t output_stride, float* scratch) {
40
44.5k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
44.5k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
44.5k
  float* block = scratch;
43
44.5k
  if (ROWS < COLS) {
44
133k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
445k
      for (size_t x = 0; x < LF_COLS; x++) {
46
356k
        block[y * COLS + x] = input[y * input_stride + x] *
47
356k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
356k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
356k
      }
50
89.0k
    }
51
44.5k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
44.5k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
44.5k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
44.5k
                                  scratch_space);
64
44.5k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
78.6k
                                   const size_t output_stride, float* scratch) {
40
78.6k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
78.6k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
78.6k
  float* block = scratch;
43
78.6k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
78.6k
  } else {
52
393k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.57M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
1.25M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
1.25M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
1.25M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
1.25M
      }
58
314k
    }
59
78.6k
  }
60
61
78.6k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
78.6k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
78.6k
                                  scratch_space);
64
78.6k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
12.0k
                                   const size_t output_stride, float* scratch) {
40
12.0k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
12.0k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
12.0k
  float* block = scratch;
43
12.0k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
12.0k
  } else {
52
60.3k
    for (size_t y = 0; y < LF_COLS; y++) {
53
434k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
386k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
386k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
386k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
386k
      }
58
48.2k
    }
59
12.0k
  }
60
61
12.0k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
12.0k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
12.0k
                                  scratch_space);
64
12.0k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
5.11k
                                   const size_t output_stride, float* scratch) {
40
5.11k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
5.11k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
5.11k
  float* block = scratch;
43
5.11k
  if (ROWS < COLS) {
44
25.5k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
184k
      for (size_t x = 0; x < LF_COLS; x++) {
46
163k
        block[y * COLS + x] = input[y * input_stride + x] *
47
163k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
163k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
163k
      }
50
20.4k
    }
51
5.11k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
5.11k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
5.11k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
5.11k
                                  scratch_space);
64
5.11k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
49.1k
                                   const size_t output_stride, float* scratch) {
40
49.1k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
49.1k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
49.1k
  float* block = scratch;
43
49.1k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
49.1k
  } else {
52
442k
    for (size_t y = 0; y < LF_COLS; y++) {
53
3.54M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
3.14M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
3.14M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
3.14M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
3.14M
      }
58
393k
    }
59
49.1k
  }
60
61
49.1k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
49.1k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
49.1k
                                  scratch_space);
64
49.1k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
208k
                                   const size_t output_stride, float* scratch) {
40
208k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
208k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
208k
  float* block = scratch;
43
208k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
208k
  } else {
52
417k
    for (size_t y = 0; y < LF_COLS; y++) {
53
626k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
417k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
417k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
417k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
417k
      }
58
208k
    }
59
208k
  }
60
61
208k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
208k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
208k
                                  scratch_space);
64
208k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
224k
                                   const size_t output_stride, float* scratch) {
40
224k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
224k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
224k
  float* block = scratch;
43
224k
  if (ROWS < COLS) {
44
449k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
674k
      for (size_t x = 0; x < LF_COLS; x++) {
46
449k
        block[y * COLS + x] = input[y * input_stride + x] *
47
449k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
449k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
449k
      }
50
224k
    }
51
224k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
224k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
224k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
224k
                                  scratch_space);
64
224k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
147k
                                   const size_t output_stride, float* scratch) {
40
147k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
147k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
147k
  float* block = scratch;
43
147k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
147k
  } else {
52
443k
    for (size_t y = 0; y < LF_COLS; y++) {
53
887k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
591k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
591k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
591k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
591k
      }
58
295k
    }
59
147k
  }
60
61
147k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
147k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
147k
                                  scratch_space);
64
147k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
45.6k
                                   const size_t output_stride, float* scratch) {
40
45.6k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
45.6k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
45.6k
  float* block = scratch;
43
45.6k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
45.6k
  } else {
52
136k
    for (size_t y = 0; y < LF_COLS; y++) {
53
456k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
365k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
365k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
365k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
365k
      }
58
91.2k
    }
59
45.6k
  }
60
61
45.6k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
45.6k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
45.6k
                                  scratch_space);
64
45.6k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
44.5k
                                   const size_t output_stride, float* scratch) {
40
44.5k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
44.5k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
44.5k
  float* block = scratch;
43
44.5k
  if (ROWS < COLS) {
44
133k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
445k
      for (size_t x = 0; x < LF_COLS; x++) {
46
356k
        block[y * COLS + x] = input[y * input_stride + x] *
47
356k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
356k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
356k
      }
50
89.0k
    }
51
44.5k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
44.5k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
44.5k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
44.5k
                                  scratch_space);
64
44.5k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
78.6k
                                   const size_t output_stride, float* scratch) {
40
78.6k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
78.6k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
78.6k
  float* block = scratch;
43
78.6k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
78.6k
  } else {
52
393k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.57M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
1.25M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
1.25M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
1.25M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
1.25M
      }
58
314k
    }
59
78.6k
  }
60
61
78.6k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
78.6k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
78.6k
                                  scratch_space);
64
78.6k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
12.0k
                                   const size_t output_stride, float* scratch) {
40
12.0k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
12.0k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
12.0k
  float* block = scratch;
43
12.0k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
12.0k
  } else {
52
60.3k
    for (size_t y = 0; y < LF_COLS; y++) {
53
434k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
386k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
386k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
386k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
386k
      }
58
48.2k
    }
59
12.0k
  }
60
61
12.0k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
12.0k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
12.0k
                                  scratch_space);
64
12.0k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
5.11k
                                   const size_t output_stride, float* scratch) {
40
5.11k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
5.11k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
5.11k
  float* block = scratch;
43
5.11k
  if (ROWS < COLS) {
44
25.5k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
184k
      for (size_t x = 0; x < LF_COLS; x++) {
46
163k
        block[y * COLS + x] = input[y * input_stride + x] *
47
163k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
163k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
163k
      }
50
20.4k
    }
51
5.11k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
5.11k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
5.11k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
5.11k
                                  scratch_space);
64
5.11k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
49.1k
                                   const size_t output_stride, float* scratch) {
40
49.1k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
49.1k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
49.1k
  float* block = scratch;
43
49.1k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
49.1k
  } else {
52
442k
    for (size_t y = 0; y < LF_COLS; y++) {
53
3.54M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
3.14M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
3.14M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
3.14M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
3.14M
      }
58
393k
    }
59
49.1k
  }
60
61
49.1k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
49.1k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
49.1k
                                  scratch_space);
64
49.1k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
65
66
template <size_t S>
67
45.6M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
45.6M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
45.6M
  static_assert(S % 2 == 0, "S should be even");
70
45.6M
  float temp[kDCTBlockSize];
71
45.6M
  constexpr size_t num_2x2 = S / 2;
72
152M
  for (size_t y = 0; y < num_2x2; y++) {
73
425M
    for (size_t x = 0; x < num_2x2; x++) {
74
319M
      float c00 = block[y * 2 * stride + x * 2];
75
319M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
319M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
319M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
319M
      float r00 = c00 + c01 + c10 + c11;
79
319M
      float r01 = c00 + c01 - c10 - c11;
80
319M
      float r10 = c00 - c01 + c10 - c11;
81
319M
      float r11 = c00 - c01 - c10 + c11;
82
319M
      r00 *= 0.25f;
83
319M
      r01 *= 0.25f;
84
319M
      r10 *= 0.25f;
85
319M
      r11 *= 0.25f;
86
319M
      temp[y * kBlockDim + x] = r00;
87
319M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
319M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
319M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
319M
    }
91
106M
  }
92
258M
  for (size_t y = 0; y < S; y++) {
93
1.49G
    for (size_t x = 0; x < S; x++) {
94
1.27G
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
1.27G
    }
96
212M
  }
97
45.6M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.85M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.85M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.85M
  static_assert(S % 2 == 0, "S should be even");
70
1.85M
  float temp[kDCTBlockSize];
71
1.85M
  constexpr size_t num_2x2 = S / 2;
72
9.28M
  for (size_t y = 0; y < num_2x2; y++) {
73
37.1M
    for (size_t x = 0; x < num_2x2; x++) {
74
29.6M
      float c00 = block[y * 2 * stride + x * 2];
75
29.6M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
29.6M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
29.6M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
29.6M
      float r00 = c00 + c01 + c10 + c11;
79
29.6M
      float r01 = c00 + c01 - c10 - c11;
80
29.6M
      float r10 = c00 - c01 + c10 - c11;
81
29.6M
      float r11 = c00 - c01 - c10 + c11;
82
29.6M
      r00 *= 0.25f;
83
29.6M
      r01 *= 0.25f;
84
29.6M
      r10 *= 0.25f;
85
29.6M
      r11 *= 0.25f;
86
29.6M
      temp[y * kBlockDim + x] = r00;
87
29.6M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
29.6M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
29.6M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
29.6M
    }
91
7.42M
  }
92
16.7M
  for (size_t y = 0; y < S; y++) {
93
133M
    for (size_t x = 0; x < S; x++) {
94
118M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
118M
    }
96
14.8M
  }
97
1.85M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.85M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.85M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.85M
  static_assert(S % 2 == 0, "S should be even");
70
1.85M
  float temp[kDCTBlockSize];
71
1.85M
  constexpr size_t num_2x2 = S / 2;
72
5.56M
  for (size_t y = 0; y < num_2x2; y++) {
73
11.1M
    for (size_t x = 0; x < num_2x2; x++) {
74
7.42M
      float c00 = block[y * 2 * stride + x * 2];
75
7.42M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
7.42M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
7.42M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
7.42M
      float r00 = c00 + c01 + c10 + c11;
79
7.42M
      float r01 = c00 + c01 - c10 - c11;
80
7.42M
      float r10 = c00 - c01 + c10 - c11;
81
7.42M
      float r11 = c00 - c01 - c10 + c11;
82
7.42M
      r00 *= 0.25f;
83
7.42M
      r01 *= 0.25f;
84
7.42M
      r10 *= 0.25f;
85
7.42M
      r11 *= 0.25f;
86
7.42M
      temp[y * kBlockDim + x] = r00;
87
7.42M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
7.42M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
7.42M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
7.42M
    }
91
3.71M
  }
92
9.28M
  for (size_t y = 0; y < S; y++) {
93
37.1M
    for (size_t x = 0; x < S; x++) {
94
29.6M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
29.6M
    }
96
7.42M
  }
97
1.85M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.85M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.85M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.85M
  static_assert(S % 2 == 0, "S should be even");
70
1.85M
  float temp[kDCTBlockSize];
71
1.85M
  constexpr size_t num_2x2 = S / 2;
72
3.71M
  for (size_t y = 0; y < num_2x2; y++) {
73
3.71M
    for (size_t x = 0; x < num_2x2; x++) {
74
1.85M
      float c00 = block[y * 2 * stride + x * 2];
75
1.85M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
1.85M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
1.85M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
1.85M
      float r00 = c00 + c01 + c10 + c11;
79
1.85M
      float r01 = c00 + c01 - c10 - c11;
80
1.85M
      float r10 = c00 - c01 + c10 - c11;
81
1.85M
      float r11 = c00 - c01 - c10 + c11;
82
1.85M
      r00 *= 0.25f;
83
1.85M
      r01 *= 0.25f;
84
1.85M
      r10 *= 0.25f;
85
1.85M
      r11 *= 0.25f;
86
1.85M
      temp[y * kBlockDim + x] = r00;
87
1.85M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
1.85M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
1.85M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
1.85M
    }
91
1.85M
  }
92
5.56M
  for (size_t y = 0; y < S; y++) {
93
11.1M
    for (size_t x = 0; x < S; x++) {
94
7.42M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
7.42M
    }
96
3.71M
  }
97
1.85M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.85M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.85M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.85M
  static_assert(S % 2 == 0, "S should be even");
70
1.85M
  float temp[kDCTBlockSize];
71
1.85M
  constexpr size_t num_2x2 = S / 2;
72
9.28M
  for (size_t y = 0; y < num_2x2; y++) {
73
37.1M
    for (size_t x = 0; x < num_2x2; x++) {
74
29.6M
      float c00 = block[y * 2 * stride + x * 2];
75
29.6M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
29.6M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
29.6M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
29.6M
      float r00 = c00 + c01 + c10 + c11;
79
29.6M
      float r01 = c00 + c01 - c10 - c11;
80
29.6M
      float r10 = c00 - c01 + c10 - c11;
81
29.6M
      float r11 = c00 - c01 - c10 + c11;
82
29.6M
      r00 *= 0.25f;
83
29.6M
      r01 *= 0.25f;
84
29.6M
      r10 *= 0.25f;
85
29.6M
      r11 *= 0.25f;
86
29.6M
      temp[y * kBlockDim + x] = r00;
87
29.6M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
29.6M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
29.6M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
29.6M
    }
91
7.42M
  }
92
16.7M
  for (size_t y = 0; y < S; y++) {
93
133M
    for (size_t x = 0; x < S; x++) {
94
118M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
118M
    }
96
14.8M
  }
97
1.85M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.85M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.85M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.85M
  static_assert(S % 2 == 0, "S should be even");
70
1.85M
  float temp[kDCTBlockSize];
71
1.85M
  constexpr size_t num_2x2 = S / 2;
72
5.56M
  for (size_t y = 0; y < num_2x2; y++) {
73
11.1M
    for (size_t x = 0; x < num_2x2; x++) {
74
7.42M
      float c00 = block[y * 2 * stride + x * 2];
75
7.42M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
7.42M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
7.42M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
7.42M
      float r00 = c00 + c01 + c10 + c11;
79
7.42M
      float r01 = c00 + c01 - c10 - c11;
80
7.42M
      float r10 = c00 - c01 + c10 - c11;
81
7.42M
      float r11 = c00 - c01 - c10 + c11;
82
7.42M
      r00 *= 0.25f;
83
7.42M
      r01 *= 0.25f;
84
7.42M
      r10 *= 0.25f;
85
7.42M
      r11 *= 0.25f;
86
7.42M
      temp[y * kBlockDim + x] = r00;
87
7.42M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
7.42M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
7.42M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
7.42M
    }
91
3.71M
  }
92
9.28M
  for (size_t y = 0; y < S; y++) {
93
37.1M
    for (size_t x = 0; x < S; x++) {
94
29.6M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
29.6M
    }
96
7.42M
  }
97
1.85M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.85M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.85M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.85M
  static_assert(S % 2 == 0, "S should be even");
70
1.85M
  float temp[kDCTBlockSize];
71
1.85M
  constexpr size_t num_2x2 = S / 2;
72
3.71M
  for (size_t y = 0; y < num_2x2; y++) {
73
3.71M
    for (size_t x = 0; x < num_2x2; x++) {
74
1.85M
      float c00 = block[y * 2 * stride + x * 2];
75
1.85M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
1.85M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
1.85M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
1.85M
      float r00 = c00 + c01 + c10 + c11;
79
1.85M
      float r01 = c00 + c01 - c10 - c11;
80
1.85M
      float r10 = c00 - c01 + c10 - c11;
81
1.85M
      float r11 = c00 - c01 - c10 + c11;
82
1.85M
      r00 *= 0.25f;
83
1.85M
      r01 *= 0.25f;
84
1.85M
      r10 *= 0.25f;
85
1.85M
      r11 *= 0.25f;
86
1.85M
      temp[y * kBlockDim + x] = r00;
87
1.85M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
1.85M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
1.85M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
1.85M
    }
91
1.85M
  }
92
5.56M
  for (size_t y = 0; y < S; y++) {
93
11.1M
    for (size_t x = 0; x < S; x++) {
94
7.42M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
7.42M
    }
96
3.71M
  }
97
1.85M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
11.4M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
11.4M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
11.4M
  static_assert(S % 2 == 0, "S should be even");
70
11.4M
  float temp[kDCTBlockSize];
71
11.4M
  constexpr size_t num_2x2 = S / 2;
72
57.4M
  for (size_t y = 0; y < num_2x2; y++) {
73
229M
    for (size_t x = 0; x < num_2x2; x++) {
74
183M
      float c00 = block[y * 2 * stride + x * 2];
75
183M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
183M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
183M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
183M
      float r00 = c00 + c01 + c10 + c11;
79
183M
      float r01 = c00 + c01 - c10 - c11;
80
183M
      float r10 = c00 - c01 + c10 - c11;
81
183M
      float r11 = c00 - c01 - c10 + c11;
82
183M
      r00 *= 0.25f;
83
183M
      r01 *= 0.25f;
84
183M
      r10 *= 0.25f;
85
183M
      r11 *= 0.25f;
86
183M
      temp[y * kBlockDim + x] = r00;
87
183M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
183M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
183M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
183M
    }
91
45.9M
  }
92
103M
  for (size_t y = 0; y < S; y++) {
93
827M
    for (size_t x = 0; x < S; x++) {
94
735M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
735M
    }
96
91.9M
  }
97
11.4M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
11.4M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
11.4M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
11.4M
  static_assert(S % 2 == 0, "S should be even");
70
11.4M
  float temp[kDCTBlockSize];
71
11.4M
  constexpr size_t num_2x2 = S / 2;
72
34.4M
  for (size_t y = 0; y < num_2x2; y++) {
73
68.9M
    for (size_t x = 0; x < num_2x2; x++) {
74
45.9M
      float c00 = block[y * 2 * stride + x * 2];
75
45.9M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
45.9M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
45.9M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
45.9M
      float r00 = c00 + c01 + c10 + c11;
79
45.9M
      float r01 = c00 + c01 - c10 - c11;
80
45.9M
      float r10 = c00 - c01 + c10 - c11;
81
45.9M
      float r11 = c00 - c01 - c10 + c11;
82
45.9M
      r00 *= 0.25f;
83
45.9M
      r01 *= 0.25f;
84
45.9M
      r10 *= 0.25f;
85
45.9M
      r11 *= 0.25f;
86
45.9M
      temp[y * kBlockDim + x] = r00;
87
45.9M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
45.9M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
45.9M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
45.9M
    }
91
22.9M
  }
92
57.4M
  for (size_t y = 0; y < S; y++) {
93
229M
    for (size_t x = 0; x < S; x++) {
94
183M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
183M
    }
96
45.9M
  }
97
11.4M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
11.4M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
11.4M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
11.4M
  static_assert(S % 2 == 0, "S should be even");
70
11.4M
  float temp[kDCTBlockSize];
71
11.4M
  constexpr size_t num_2x2 = S / 2;
72
22.9M
  for (size_t y = 0; y < num_2x2; y++) {
73
22.9M
    for (size_t x = 0; x < num_2x2; x++) {
74
11.4M
      float c00 = block[y * 2 * stride + x * 2];
75
11.4M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
11.4M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
11.4M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
11.4M
      float r00 = c00 + c01 + c10 + c11;
79
11.4M
      float r01 = c00 + c01 - c10 - c11;
80
11.4M
      float r10 = c00 - c01 + c10 - c11;
81
11.4M
      float r11 = c00 - c01 - c10 + c11;
82
11.4M
      r00 *= 0.25f;
83
11.4M
      r01 *= 0.25f;
84
11.4M
      r10 *= 0.25f;
85
11.4M
      r11 *= 0.25f;
86
11.4M
      temp[y * kBlockDim + x] = r00;
87
11.4M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
11.4M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
11.4M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
11.4M
    }
91
11.4M
  }
92
34.4M
  for (size_t y = 0; y < S; y++) {
93
68.9M
    for (size_t x = 0; x < S; x++) {
94
45.9M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
45.9M
    }
96
22.9M
  }
97
11.4M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
98
99
46.5M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
46.5M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
46.5M
      {
102
46.5M
          0.2500000000000000,
103
46.5M
          0.8769029297991420f,
104
46.5M
          0.0000000000000000,
105
46.5M
          0.0000000000000000,
106
46.5M
          0.0000000000000000,
107
46.5M
          -0.4105377591765233f,
108
46.5M
          0.0000000000000000,
109
46.5M
          0.0000000000000000,
110
46.5M
          0.0000000000000000,
111
46.5M
          0.0000000000000000,
112
46.5M
          0.0000000000000000,
113
46.5M
          0.0000000000000000,
114
46.5M
          0.0000000000000000,
115
46.5M
          0.0000000000000000,
116
46.5M
          0.0000000000000000,
117
46.5M
          0.0000000000000000,
118
46.5M
      },
119
46.5M
      {
120
46.5M
          0.2500000000000000,
121
46.5M
          0.2206518106944235f,
122
46.5M
          0.0000000000000000,
123
46.5M
          0.0000000000000000,
124
46.5M
          -0.7071067811865474f,
125
46.5M
          0.6235485373547691f,
126
46.5M
          0.0000000000000000,
127
46.5M
          0.0000000000000000,
128
46.5M
          0.0000000000000000,
129
46.5M
          0.0000000000000000,
130
46.5M
          0.0000000000000000,
131
46.5M
          0.0000000000000000,
132
46.5M
          0.0000000000000000,
133
46.5M
          0.0000000000000000,
134
46.5M
          0.0000000000000000,
135
46.5M
          0.0000000000000000,
136
46.5M
      },
137
46.5M
      {
138
46.5M
          0.2500000000000000,
139
46.5M
          -0.1014005039375376f,
140
46.5M
          0.4067007583026075f,
141
46.5M
          -0.2125574805828875f,
142
46.5M
          0.0000000000000000,
143
46.5M
          -0.0643507165794627f,
144
46.5M
          -0.4517556589999482f,
145
46.5M
          -0.3046847507248690f,
146
46.5M
          0.3017929516615495f,
147
46.5M
          0.4082482904638627f,
148
46.5M
          0.1747866975480809f,
149
46.5M
          -0.2110560104933578f,
150
46.5M
          -0.1426608480880726f,
151
46.5M
          -0.1381354035075859f,
152
46.5M
          -0.1743760259965107f,
153
46.5M
          0.1135498731499434f,
154
46.5M
      },
155
46.5M
      {
156
46.5M
          0.2500000000000000,
157
46.5M
          -0.1014005039375375f,
158
46.5M
          0.4444481661973445f,
159
46.5M
          0.3085497062849767f,
160
46.5M
          0.0000000000000000f,
161
46.5M
          -0.0643507165794627f,
162
46.5M
          0.1585450355184006f,
163
46.5M
          0.5112616136591823f,
164
46.5M
          0.2579236279634118f,
165
46.5M
          0.0000000000000000,
166
46.5M
          0.0812611176717539f,
167
46.5M
          0.1856718091610980f,
168
46.5M
          -0.3416446842253372f,
169
46.5M
          0.3302282550303788f,
170
46.5M
          0.0702790691196284f,
171
46.5M
          -0.0741750459581035f,
172
46.5M
      },
173
46.5M
      {
174
46.5M
          0.2500000000000000,
175
46.5M
          0.2206518106944236f,
176
46.5M
          0.0000000000000000,
177
46.5M
          0.0000000000000000,
178
46.5M
          0.7071067811865476f,
179
46.5M
          0.6235485373547694f,
180
46.5M
          0.0000000000000000,
181
46.5M
          0.0000000000000000,
182
46.5M
          0.0000000000000000,
183
46.5M
          0.0000000000000000,
184
46.5M
          0.0000000000000000,
185
46.5M
          0.0000000000000000,
186
46.5M
          0.0000000000000000,
187
46.5M
          0.0000000000000000,
188
46.5M
          0.0000000000000000,
189
46.5M
          0.0000000000000000,
190
46.5M
      },
191
46.5M
      {
192
46.5M
          0.2500000000000000,
193
46.5M
          -0.1014005039375378f,
194
46.5M
          0.0000000000000000,
195
46.5M
          0.4706702258572536f,
196
46.5M
          0.0000000000000000,
197
46.5M
          -0.0643507165794628f,
198
46.5M
          -0.0403851516082220f,
199
46.5M
          0.0000000000000000,
200
46.5M
          0.1627234014286620f,
201
46.5M
          0.0000000000000000,
202
46.5M
          0.0000000000000000,
203
46.5M
          0.0000000000000000,
204
46.5M
          0.7367497537172237f,
205
46.5M
          0.0875511500058708f,
206
46.5M
          -0.2921026642334881f,
207
46.5M
          0.1940289303259434f,
208
46.5M
      },
209
46.5M
      {
210
46.5M
          0.2500000000000000,
211
46.5M
          -0.1014005039375377f,
212
46.5M
          0.1957439937204294f,
213
46.5M
          -0.1621205195722993f,
214
46.5M
          0.0000000000000000,
215
46.5M
          -0.0643507165794628f,
216
46.5M
          0.0074182263792424f,
217
46.5M
          -0.2904801297289980f,
218
46.5M
          0.0952002265347504f,
219
46.5M
          0.0000000000000000,
220
46.5M
          -0.3675398009862027f,
221
46.5M
          0.4921585901373873f,
222
46.5M
          0.2462710772207515f,
223
46.5M
          -0.0794670660590957f,
224
46.5M
          0.3623817333531167f,
225
46.5M
          -0.4351904965232280f,
226
46.5M
      },
227
46.5M
      {
228
46.5M
          0.2500000000000000,
229
46.5M
          -0.1014005039375376f,
230
46.5M
          0.2929100136981264f,
231
46.5M
          0.0000000000000000,
232
46.5M
          0.0000000000000000,
233
46.5M
          -0.0643507165794627f,
234
46.5M
          0.3935103426921017f,
235
46.5M
          -0.0657870154914280f,
236
46.5M
          0.0000000000000000,
237
46.5M
          -0.4082482904638628f,
238
46.5M
          -0.3078822139579090f,
239
46.5M
          -0.3852501370925192f,
240
46.5M
          -0.0857401903551931f,
241
46.5M
          -0.4613374887461511f,
242
46.5M
          0.0000000000000000,
243
46.5M
          0.2191868483885747f,
244
46.5M
      },
245
46.5M
      {
246
46.5M
          0.2500000000000000,
247
46.5M
          -0.1014005039375376f,
248
46.5M
          -0.4067007583026072f,
249
46.5M
          -0.2125574805828705f,
250
46.5M
          0.0000000000000000,
251
46.5M
          -0.0643507165794627f,
252
46.5M
          -0.4517556589999464f,
253
46.5M
          0.3046847507248840f,
254
46.5M
          0.3017929516615503f,
255
46.5M
          -0.4082482904638635f,
256
46.5M
          -0.1747866975480813f,
257
46.5M
          0.2110560104933581f,
258
46.5M
          -0.1426608480880734f,
259
46.5M
          -0.1381354035075829f,
260
46.5M
          -0.1743760259965108f,
261
46.5M
          0.1135498731499426f,
262
46.5M
      },
263
46.5M
      {
264
46.5M
          0.2500000000000000,
265
46.5M
          -0.1014005039375377f,
266
46.5M
          -0.1957439937204287f,
267
46.5M
          -0.1621205195722833f,
268
46.5M
          0.0000000000000000,
269
46.5M
          -0.0643507165794628f,
270
46.5M
          0.0074182263792444f,
271
46.5M
          0.2904801297290076f,
272
46.5M
          0.0952002265347505f,
273
46.5M
          0.0000000000000000,
274
46.5M
          0.3675398009862011f,
275
46.5M
          -0.4921585901373891f,
276
46.5M
          0.2462710772207514f,
277
46.5M
          -0.0794670660591026f,
278
46.5M
          0.3623817333531165f,
279
46.5M
          -0.4351904965232251f,
280
46.5M
      },
281
46.5M
      {
282
46.5M
          0.2500000000000000,
283
46.5M
          -0.1014005039375375f,
284
46.5M
          0.0000000000000000,
285
46.5M
          -0.4706702258572528f,
286
46.5M
          0.0000000000000000,
287
46.5M
          -0.0643507165794627f,
288
46.5M
          0.1107416575309343f,
289
46.5M
          0.0000000000000000,
290
46.5M
          -0.1627234014286617f,
291
46.5M
          0.0000000000000000,
292
46.5M
          0.0000000000000000,
293
46.5M
          0.0000000000000000,
294
46.5M
          0.1488339922711357f,
295
46.5M
          0.4972464710953509f,
296
46.5M
          0.2921026642334879f,
297
46.5M
          0.5550443808910661f,
298
46.5M
      },
299
46.5M
      {
300
46.5M
          0.2500000000000000,
301
46.5M
          -0.1014005039375377f,
302
46.5M
          0.1137907446044809f,
303
46.5M
          -0.1464291867126764f,
304
46.5M
          0.0000000000000000,
305
46.5M
          -0.0643507165794628f,
306
46.5M
          0.0829816309488205f,
307
46.5M
          -0.2388977352334460f,
308
46.5M
          -0.3531238544981630f,
309
46.5M
          -0.4082482904638630f,
310
46.5M
          0.4826689115059883f,
311
46.5M
          0.1741941265991622f,
312
46.5M
          -0.0476868035022925f,
313
46.5M
          0.1253805944856366f,
314
46.5M
          -0.4326608024727445f,
315
46.5M
          -0.2546827712406646f,
316
46.5M
      },
317
46.5M
      {
318
46.5M
          0.2500000000000000,
319
46.5M
          -0.1014005039375377f,
320
46.5M
          -0.4444481661973438f,
321
46.5M
          0.3085497062849487f,
322
46.5M
          0.0000000000000000,
323
46.5M
          -0.0643507165794628f,
324
46.5M
          0.1585450355183970f,
325
46.5M
          -0.5112616136592012f,
326
46.5M
          0.2579236279634129f,
327
46.5M
          0.0000000000000000,
328
46.5M
          -0.0812611176717504f,
329
46.5M
          -0.1856718091610990f,
330
46.5M
          -0.3416446842253373f,
331
46.5M
          0.3302282550303805f,
332
46.5M
          0.0702790691196282f,
333
46.5M
          -0.0741750459581023f,
334
46.5M
      },
335
46.5M
      {
336
46.5M
          0.2500000000000000,
337
46.5M
          -0.1014005039375376f,
338
46.5M
          -0.2929100136981264f,
339
46.5M
          0.0000000000000000,
340
46.5M
          0.0000000000000000,
341
46.5M
          -0.0643507165794627f,
342
46.5M
          0.3935103426921022f,
343
46.5M
          0.0657870154914254f,
344
46.5M
          0.0000000000000000,
345
46.5M
          0.4082482904638634f,
346
46.5M
          0.3078822139579031f,
347
46.5M
          0.3852501370925211f,
348
46.5M
          -0.0857401903551927f,
349
46.5M
          -0.4613374887461554f,
350
46.5M
          0.0000000000000000,
351
46.5M
          0.2191868483885728f,
352
46.5M
      },
353
46.5M
      {
354
46.5M
          0.2500000000000000,
355
46.5M
          -0.1014005039375376f,
356
46.5M
          -0.1137907446044814f,
357
46.5M
          -0.1464291867126654f,
358
46.5M
          0.0000000000000000,
359
46.5M
          -0.0643507165794627f,
360
46.5M
          0.0829816309488214f,
361
46.5M
          0.2388977352334547f,
362
46.5M
          -0.3531238544981624f,
363
46.5M
          0.4082482904638630f,
364
46.5M
          -0.4826689115059858f,
365
46.5M
          -0.1741941265991621f,
366
46.5M
          -0.0476868035022928f,
367
46.5M
          0.1253805944856431f,
368
46.5M
          -0.4326608024727457f,
369
46.5M
          -0.2546827712406641f,
370
46.5M
      },
371
46.5M
      {
372
46.5M
          0.2500000000000000,
373
46.5M
          -0.1014005039375374f,
374
46.5M
          0.0000000000000000,
375
46.5M
          0.4251149611657548f,
376
46.5M
          0.0000000000000000,
377
46.5M
          -0.0643507165794626f,
378
46.5M
          -0.4517556589999480f,
379
46.5M
          0.0000000000000000,
380
46.5M
          -0.6035859033230976f,
381
46.5M
          0.0000000000000000,
382
46.5M
          0.0000000000000000,
383
46.5M
          0.0000000000000000,
384
46.5M
          -0.1426608480880724f,
385
46.5M
          -0.1381354035075845f,
386
46.5M
          0.3487520519930227f,
387
46.5M
          0.1135498731499429f,
388
46.5M
      },
389
46.5M
  };
390
391
46.5M
  const HWY_CAPPED(float, 16) d;
392
139M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
93.0M
    auto scalar = Zero(d);
394
1.58G
    for (size_t j = 0; j < 16; j++) {
395
1.48G
      auto px = Set(d, pixels[j]);
396
1.48G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
1.48G
      scalar = MulAdd(px, basis, scalar);
398
1.48G
    }
399
93.0M
    Store(scalar, d, coeffs + i);
400
93.0M
  }
401
46.5M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
278k
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
278k
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
278k
      {
102
278k
          0.2500000000000000,
103
278k
          0.8769029297991420f,
104
278k
          0.0000000000000000,
105
278k
          0.0000000000000000,
106
278k
          0.0000000000000000,
107
278k
          -0.4105377591765233f,
108
278k
          0.0000000000000000,
109
278k
          0.0000000000000000,
110
278k
          0.0000000000000000,
111
278k
          0.0000000000000000,
112
278k
          0.0000000000000000,
113
278k
          0.0000000000000000,
114
278k
          0.0000000000000000,
115
278k
          0.0000000000000000,
116
278k
          0.0000000000000000,
117
278k
          0.0000000000000000,
118
278k
      },
119
278k
      {
120
278k
          0.2500000000000000,
121
278k
          0.2206518106944235f,
122
278k
          0.0000000000000000,
123
278k
          0.0000000000000000,
124
278k
          -0.7071067811865474f,
125
278k
          0.6235485373547691f,
126
278k
          0.0000000000000000,
127
278k
          0.0000000000000000,
128
278k
          0.0000000000000000,
129
278k
          0.0000000000000000,
130
278k
          0.0000000000000000,
131
278k
          0.0000000000000000,
132
278k
          0.0000000000000000,
133
278k
          0.0000000000000000,
134
278k
          0.0000000000000000,
135
278k
          0.0000000000000000,
136
278k
      },
137
278k
      {
138
278k
          0.2500000000000000,
139
278k
          -0.1014005039375376f,
140
278k
          0.4067007583026075f,
141
278k
          -0.2125574805828875f,
142
278k
          0.0000000000000000,
143
278k
          -0.0643507165794627f,
144
278k
          -0.4517556589999482f,
145
278k
          -0.3046847507248690f,
146
278k
          0.3017929516615495f,
147
278k
          0.4082482904638627f,
148
278k
          0.1747866975480809f,
149
278k
          -0.2110560104933578f,
150
278k
          -0.1426608480880726f,
151
278k
          -0.1381354035075859f,
152
278k
          -0.1743760259965107f,
153
278k
          0.1135498731499434f,
154
278k
      },
155
278k
      {
156
278k
          0.2500000000000000,
157
278k
          -0.1014005039375375f,
158
278k
          0.4444481661973445f,
159
278k
          0.3085497062849767f,
160
278k
          0.0000000000000000f,
161
278k
          -0.0643507165794627f,
162
278k
          0.1585450355184006f,
163
278k
          0.5112616136591823f,
164
278k
          0.2579236279634118f,
165
278k
          0.0000000000000000,
166
278k
          0.0812611176717539f,
167
278k
          0.1856718091610980f,
168
278k
          -0.3416446842253372f,
169
278k
          0.3302282550303788f,
170
278k
          0.0702790691196284f,
171
278k
          -0.0741750459581035f,
172
278k
      },
173
278k
      {
174
278k
          0.2500000000000000,
175
278k
          0.2206518106944236f,
176
278k
          0.0000000000000000,
177
278k
          0.0000000000000000,
178
278k
          0.7071067811865476f,
179
278k
          0.6235485373547694f,
180
278k
          0.0000000000000000,
181
278k
          0.0000000000000000,
182
278k
          0.0000000000000000,
183
278k
          0.0000000000000000,
184
278k
          0.0000000000000000,
185
278k
          0.0000000000000000,
186
278k
          0.0000000000000000,
187
278k
          0.0000000000000000,
188
278k
          0.0000000000000000,
189
278k
          0.0000000000000000,
190
278k
      },
191
278k
      {
192
278k
          0.2500000000000000,
193
278k
          -0.1014005039375378f,
194
278k
          0.0000000000000000,
195
278k
          0.4706702258572536f,
196
278k
          0.0000000000000000,
197
278k
          -0.0643507165794628f,
198
278k
          -0.0403851516082220f,
199
278k
          0.0000000000000000,
200
278k
          0.1627234014286620f,
201
278k
          0.0000000000000000,
202
278k
          0.0000000000000000,
203
278k
          0.0000000000000000,
204
278k
          0.7367497537172237f,
205
278k
          0.0875511500058708f,
206
278k
          -0.2921026642334881f,
207
278k
          0.1940289303259434f,
208
278k
      },
209
278k
      {
210
278k
          0.2500000000000000,
211
278k
          -0.1014005039375377f,
212
278k
          0.1957439937204294f,
213
278k
          -0.1621205195722993f,
214
278k
          0.0000000000000000,
215
278k
          -0.0643507165794628f,
216
278k
          0.0074182263792424f,
217
278k
          -0.2904801297289980f,
218
278k
          0.0952002265347504f,
219
278k
          0.0000000000000000,
220
278k
          -0.3675398009862027f,
221
278k
          0.4921585901373873f,
222
278k
          0.2462710772207515f,
223
278k
          -0.0794670660590957f,
224
278k
          0.3623817333531167f,
225
278k
          -0.4351904965232280f,
226
278k
      },
227
278k
      {
228
278k
          0.2500000000000000,
229
278k
          -0.1014005039375376f,
230
278k
          0.2929100136981264f,
231
278k
          0.0000000000000000,
232
278k
          0.0000000000000000,
233
278k
          -0.0643507165794627f,
234
278k
          0.3935103426921017f,
235
278k
          -0.0657870154914280f,
236
278k
          0.0000000000000000,
237
278k
          -0.4082482904638628f,
238
278k
          -0.3078822139579090f,
239
278k
          -0.3852501370925192f,
240
278k
          -0.0857401903551931f,
241
278k
          -0.4613374887461511f,
242
278k
          0.0000000000000000,
243
278k
          0.2191868483885747f,
244
278k
      },
245
278k
      {
246
278k
          0.2500000000000000,
247
278k
          -0.1014005039375376f,
248
278k
          -0.4067007583026072f,
249
278k
          -0.2125574805828705f,
250
278k
          0.0000000000000000,
251
278k
          -0.0643507165794627f,
252
278k
          -0.4517556589999464f,
253
278k
          0.3046847507248840f,
254
278k
          0.3017929516615503f,
255
278k
          -0.4082482904638635f,
256
278k
          -0.1747866975480813f,
257
278k
          0.2110560104933581f,
258
278k
          -0.1426608480880734f,
259
278k
          -0.1381354035075829f,
260
278k
          -0.1743760259965108f,
261
278k
          0.1135498731499426f,
262
278k
      },
263
278k
      {
264
278k
          0.2500000000000000,
265
278k
          -0.1014005039375377f,
266
278k
          -0.1957439937204287f,
267
278k
          -0.1621205195722833f,
268
278k
          0.0000000000000000,
269
278k
          -0.0643507165794628f,
270
278k
          0.0074182263792444f,
271
278k
          0.2904801297290076f,
272
278k
          0.0952002265347505f,
273
278k
          0.0000000000000000,
274
278k
          0.3675398009862011f,
275
278k
          -0.4921585901373891f,
276
278k
          0.2462710772207514f,
277
278k
          -0.0794670660591026f,
278
278k
          0.3623817333531165f,
279
278k
          -0.4351904965232251f,
280
278k
      },
281
278k
      {
282
278k
          0.2500000000000000,
283
278k
          -0.1014005039375375f,
284
278k
          0.0000000000000000,
285
278k
          -0.4706702258572528f,
286
278k
          0.0000000000000000,
287
278k
          -0.0643507165794627f,
288
278k
          0.1107416575309343f,
289
278k
          0.0000000000000000,
290
278k
          -0.1627234014286617f,
291
278k
          0.0000000000000000,
292
278k
          0.0000000000000000,
293
278k
          0.0000000000000000,
294
278k
          0.1488339922711357f,
295
278k
          0.4972464710953509f,
296
278k
          0.2921026642334879f,
297
278k
          0.5550443808910661f,
298
278k
      },
299
278k
      {
300
278k
          0.2500000000000000,
301
278k
          -0.1014005039375377f,
302
278k
          0.1137907446044809f,
303
278k
          -0.1464291867126764f,
304
278k
          0.0000000000000000,
305
278k
          -0.0643507165794628f,
306
278k
          0.0829816309488205f,
307
278k
          -0.2388977352334460f,
308
278k
          -0.3531238544981630f,
309
278k
          -0.4082482904638630f,
310
278k
          0.4826689115059883f,
311
278k
          0.1741941265991622f,
312
278k
          -0.0476868035022925f,
313
278k
          0.1253805944856366f,
314
278k
          -0.4326608024727445f,
315
278k
          -0.2546827712406646f,
316
278k
      },
317
278k
      {
318
278k
          0.2500000000000000,
319
278k
          -0.1014005039375377f,
320
278k
          -0.4444481661973438f,
321
278k
          0.3085497062849487f,
322
278k
          0.0000000000000000,
323
278k
          -0.0643507165794628f,
324
278k
          0.1585450355183970f,
325
278k
          -0.5112616136592012f,
326
278k
          0.2579236279634129f,
327
278k
          0.0000000000000000,
328
278k
          -0.0812611176717504f,
329
278k
          -0.1856718091610990f,
330
278k
          -0.3416446842253373f,
331
278k
          0.3302282550303805f,
332
278k
          0.0702790691196282f,
333
278k
          -0.0741750459581023f,
334
278k
      },
335
278k
      {
336
278k
          0.2500000000000000,
337
278k
          -0.1014005039375376f,
338
278k
          -0.2929100136981264f,
339
278k
          0.0000000000000000,
340
278k
          0.0000000000000000,
341
278k
          -0.0643507165794627f,
342
278k
          0.3935103426921022f,
343
278k
          0.0657870154914254f,
344
278k
          0.0000000000000000,
345
278k
          0.4082482904638634f,
346
278k
          0.3078822139579031f,
347
278k
          0.3852501370925211f,
348
278k
          -0.0857401903551927f,
349
278k
          -0.4613374887461554f,
350
278k
          0.0000000000000000,
351
278k
          0.2191868483885728f,
352
278k
      },
353
278k
      {
354
278k
          0.2500000000000000,
355
278k
          -0.1014005039375376f,
356
278k
          -0.1137907446044814f,
357
278k
          -0.1464291867126654f,
358
278k
          0.0000000000000000,
359
278k
          -0.0643507165794627f,
360
278k
          0.0829816309488214f,
361
278k
          0.2388977352334547f,
362
278k
          -0.3531238544981624f,
363
278k
          0.4082482904638630f,
364
278k
          -0.4826689115059858f,
365
278k
          -0.1741941265991621f,
366
278k
          -0.0476868035022928f,
367
278k
          0.1253805944856431f,
368
278k
          -0.4326608024727457f,
369
278k
          -0.2546827712406641f,
370
278k
      },
371
278k
      {
372
278k
          0.2500000000000000,
373
278k
          -0.1014005039375374f,
374
278k
          0.0000000000000000,
375
278k
          0.4251149611657548f,
376
278k
          0.0000000000000000,
377
278k
          -0.0643507165794626f,
378
278k
          -0.4517556589999480f,
379
278k
          0.0000000000000000,
380
278k
          -0.6035859033230976f,
381
278k
          0.0000000000000000,
382
278k
          0.0000000000000000,
383
278k
          0.0000000000000000,
384
278k
          -0.1426608480880724f,
385
278k
          -0.1381354035075845f,
386
278k
          0.3487520519930227f,
387
278k
          0.1135498731499429f,
388
278k
      },
389
278k
  };
390
391
278k
  const HWY_CAPPED(float, 16) d;
392
836k
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
557k
    auto scalar = Zero(d);
394
9.48M
    for (size_t j = 0; j < 16; j++) {
395
8.92M
      auto px = Set(d, pixels[j]);
396
8.92M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
8.92M
      scalar = MulAdd(px, basis, scalar);
398
8.92M
    }
399
557k
    Store(scalar, d, coeffs + i);
400
557k
  }
401
278k
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
278k
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
278k
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
278k
      {
102
278k
          0.2500000000000000,
103
278k
          0.8769029297991420f,
104
278k
          0.0000000000000000,
105
278k
          0.0000000000000000,
106
278k
          0.0000000000000000,
107
278k
          -0.4105377591765233f,
108
278k
          0.0000000000000000,
109
278k
          0.0000000000000000,
110
278k
          0.0000000000000000,
111
278k
          0.0000000000000000,
112
278k
          0.0000000000000000,
113
278k
          0.0000000000000000,
114
278k
          0.0000000000000000,
115
278k
          0.0000000000000000,
116
278k
          0.0000000000000000,
117
278k
          0.0000000000000000,
118
278k
      },
119
278k
      {
120
278k
          0.2500000000000000,
121
278k
          0.2206518106944235f,
122
278k
          0.0000000000000000,
123
278k
          0.0000000000000000,
124
278k
          -0.7071067811865474f,
125
278k
          0.6235485373547691f,
126
278k
          0.0000000000000000,
127
278k
          0.0000000000000000,
128
278k
          0.0000000000000000,
129
278k
          0.0000000000000000,
130
278k
          0.0000000000000000,
131
278k
          0.0000000000000000,
132
278k
          0.0000000000000000,
133
278k
          0.0000000000000000,
134
278k
          0.0000000000000000,
135
278k
          0.0000000000000000,
136
278k
      },
137
278k
      {
138
278k
          0.2500000000000000,
139
278k
          -0.1014005039375376f,
140
278k
          0.4067007583026075f,
141
278k
          -0.2125574805828875f,
142
278k
          0.0000000000000000,
143
278k
          -0.0643507165794627f,
144
278k
          -0.4517556589999482f,
145
278k
          -0.3046847507248690f,
146
278k
          0.3017929516615495f,
147
278k
          0.4082482904638627f,
148
278k
          0.1747866975480809f,
149
278k
          -0.2110560104933578f,
150
278k
          -0.1426608480880726f,
151
278k
          -0.1381354035075859f,
152
278k
          -0.1743760259965107f,
153
278k
          0.1135498731499434f,
154
278k
      },
155
278k
      {
156
278k
          0.2500000000000000,
157
278k
          -0.1014005039375375f,
158
278k
          0.4444481661973445f,
159
278k
          0.3085497062849767f,
160
278k
          0.0000000000000000f,
161
278k
          -0.0643507165794627f,
162
278k
          0.1585450355184006f,
163
278k
          0.5112616136591823f,
164
278k
          0.2579236279634118f,
165
278k
          0.0000000000000000,
166
278k
          0.0812611176717539f,
167
278k
          0.1856718091610980f,
168
278k
          -0.3416446842253372f,
169
278k
          0.3302282550303788f,
170
278k
          0.0702790691196284f,
171
278k
          -0.0741750459581035f,
172
278k
      },
173
278k
      {
174
278k
          0.2500000000000000,
175
278k
          0.2206518106944236f,
176
278k
          0.0000000000000000,
177
278k
          0.0000000000000000,
178
278k
          0.7071067811865476f,
179
278k
          0.6235485373547694f,
180
278k
          0.0000000000000000,
181
278k
          0.0000000000000000,
182
278k
          0.0000000000000000,
183
278k
          0.0000000000000000,
184
278k
          0.0000000000000000,
185
278k
          0.0000000000000000,
186
278k
          0.0000000000000000,
187
278k
          0.0000000000000000,
188
278k
          0.0000000000000000,
189
278k
          0.0000000000000000,
190
278k
      },
191
278k
      {
192
278k
          0.2500000000000000,
193
278k
          -0.1014005039375378f,
194
278k
          0.0000000000000000,
195
278k
          0.4706702258572536f,
196
278k
          0.0000000000000000,
197
278k
          -0.0643507165794628f,
198
278k
          -0.0403851516082220f,
199
278k
          0.0000000000000000,
200
278k
          0.1627234014286620f,
201
278k
          0.0000000000000000,
202
278k
          0.0000000000000000,
203
278k
          0.0000000000000000,
204
278k
          0.7367497537172237f,
205
278k
          0.0875511500058708f,
206
278k
          -0.2921026642334881f,
207
278k
          0.1940289303259434f,
208
278k
      },
209
278k
      {
210
278k
          0.2500000000000000,
211
278k
          -0.1014005039375377f,
212
278k
          0.1957439937204294f,
213
278k
          -0.1621205195722993f,
214
278k
          0.0000000000000000,
215
278k
          -0.0643507165794628f,
216
278k
          0.0074182263792424f,
217
278k
          -0.2904801297289980f,
218
278k
          0.0952002265347504f,
219
278k
          0.0000000000000000,
220
278k
          -0.3675398009862027f,
221
278k
          0.4921585901373873f,
222
278k
          0.2462710772207515f,
223
278k
          -0.0794670660590957f,
224
278k
          0.3623817333531167f,
225
278k
          -0.4351904965232280f,
226
278k
      },
227
278k
      {
228
278k
          0.2500000000000000,
229
278k
          -0.1014005039375376f,
230
278k
          0.2929100136981264f,
231
278k
          0.0000000000000000,
232
278k
          0.0000000000000000,
233
278k
          -0.0643507165794627f,
234
278k
          0.3935103426921017f,
235
278k
          -0.0657870154914280f,
236
278k
          0.0000000000000000,
237
278k
          -0.4082482904638628f,
238
278k
          -0.3078822139579090f,
239
278k
          -0.3852501370925192f,
240
278k
          -0.0857401903551931f,
241
278k
          -0.4613374887461511f,
242
278k
          0.0000000000000000,
243
278k
          0.2191868483885747f,
244
278k
      },
245
278k
      {
246
278k
          0.2500000000000000,
247
278k
          -0.1014005039375376f,
248
278k
          -0.4067007583026072f,
249
278k
          -0.2125574805828705f,
250
278k
          0.0000000000000000,
251
278k
          -0.0643507165794627f,
252
278k
          -0.4517556589999464f,
253
278k
          0.3046847507248840f,
254
278k
          0.3017929516615503f,
255
278k
          -0.4082482904638635f,
256
278k
          -0.1747866975480813f,
257
278k
          0.2110560104933581f,
258
278k
          -0.1426608480880734f,
259
278k
          -0.1381354035075829f,
260
278k
          -0.1743760259965108f,
261
278k
          0.1135498731499426f,
262
278k
      },
263
278k
      {
264
278k
          0.2500000000000000,
265
278k
          -0.1014005039375377f,
266
278k
          -0.1957439937204287f,
267
278k
          -0.1621205195722833f,
268
278k
          0.0000000000000000,
269
278k
          -0.0643507165794628f,
270
278k
          0.0074182263792444f,
271
278k
          0.2904801297290076f,
272
278k
          0.0952002265347505f,
273
278k
          0.0000000000000000,
274
278k
          0.3675398009862011f,
275
278k
          -0.4921585901373891f,
276
278k
          0.2462710772207514f,
277
278k
          -0.0794670660591026f,
278
278k
          0.3623817333531165f,
279
278k
          -0.4351904965232251f,
280
278k
      },
281
278k
      {
282
278k
          0.2500000000000000,
283
278k
          -0.1014005039375375f,
284
278k
          0.0000000000000000,
285
278k
          -0.4706702258572528f,
286
278k
          0.0000000000000000,
287
278k
          -0.0643507165794627f,
288
278k
          0.1107416575309343f,
289
278k
          0.0000000000000000,
290
278k
          -0.1627234014286617f,
291
278k
          0.0000000000000000,
292
278k
          0.0000000000000000,
293
278k
          0.0000000000000000,
294
278k
          0.1488339922711357f,
295
278k
          0.4972464710953509f,
296
278k
          0.2921026642334879f,
297
278k
          0.5550443808910661f,
298
278k
      },
299
278k
      {
300
278k
          0.2500000000000000,
301
278k
          -0.1014005039375377f,
302
278k
          0.1137907446044809f,
303
278k
          -0.1464291867126764f,
304
278k
          0.0000000000000000,
305
278k
          -0.0643507165794628f,
306
278k
          0.0829816309488205f,
307
278k
          -0.2388977352334460f,
308
278k
          -0.3531238544981630f,
309
278k
          -0.4082482904638630f,
310
278k
          0.4826689115059883f,
311
278k
          0.1741941265991622f,
312
278k
          -0.0476868035022925f,
313
278k
          0.1253805944856366f,
314
278k
          -0.4326608024727445f,
315
278k
          -0.2546827712406646f,
316
278k
      },
317
278k
      {
318
278k
          0.2500000000000000,
319
278k
          -0.1014005039375377f,
320
278k
          -0.4444481661973438f,
321
278k
          0.3085497062849487f,
322
278k
          0.0000000000000000,
323
278k
          -0.0643507165794628f,
324
278k
          0.1585450355183970f,
325
278k
          -0.5112616136592012f,
326
278k
          0.2579236279634129f,
327
278k
          0.0000000000000000,
328
278k
          -0.0812611176717504f,
329
278k
          -0.1856718091610990f,
330
278k
          -0.3416446842253373f,
331
278k
          0.3302282550303805f,
332
278k
          0.0702790691196282f,
333
278k
          -0.0741750459581023f,
334
278k
      },
335
278k
      {
336
278k
          0.2500000000000000,
337
278k
          -0.1014005039375376f,
338
278k
          -0.2929100136981264f,
339
278k
          0.0000000000000000,
340
278k
          0.0000000000000000,
341
278k
          -0.0643507165794627f,
342
278k
          0.3935103426921022f,
343
278k
          0.0657870154914254f,
344
278k
          0.0000000000000000,
345
278k
          0.4082482904638634f,
346
278k
          0.3078822139579031f,
347
278k
          0.3852501370925211f,
348
278k
          -0.0857401903551927f,
349
278k
          -0.4613374887461554f,
350
278k
          0.0000000000000000,
351
278k
          0.2191868483885728f,
352
278k
      },
353
278k
      {
354
278k
          0.2500000000000000,
355
278k
          -0.1014005039375376f,
356
278k
          -0.1137907446044814f,
357
278k
          -0.1464291867126654f,
358
278k
          0.0000000000000000,
359
278k
          -0.0643507165794627f,
360
278k
          0.0829816309488214f,
361
278k
          0.2388977352334547f,
362
278k
          -0.3531238544981624f,
363
278k
          0.4082482904638630f,
364
278k
          -0.4826689115059858f,
365
278k
          -0.1741941265991621f,
366
278k
          -0.0476868035022928f,
367
278k
          0.1253805944856431f,
368
278k
          -0.4326608024727457f,
369
278k
          -0.2546827712406641f,
370
278k
      },
371
278k
      {
372
278k
          0.2500000000000000,
373
278k
          -0.1014005039375374f,
374
278k
          0.0000000000000000,
375
278k
          0.4251149611657548f,
376
278k
          0.0000000000000000,
377
278k
          -0.0643507165794626f,
378
278k
          -0.4517556589999480f,
379
278k
          0.0000000000000000,
380
278k
          -0.6035859033230976f,
381
278k
          0.0000000000000000,
382
278k
          0.0000000000000000,
383
278k
          0.0000000000000000,
384
278k
          -0.1426608480880724f,
385
278k
          -0.1381354035075845f,
386
278k
          0.3487520519930227f,
387
278k
          0.1135498731499429f,
388
278k
      },
389
278k
  };
390
391
278k
  const HWY_CAPPED(float, 16) d;
392
836k
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
557k
    auto scalar = Zero(d);
394
9.48M
    for (size_t j = 0; j < 16; j++) {
395
8.92M
      auto px = Set(d, pixels[j]);
396
8.92M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
8.92M
      scalar = MulAdd(px, basis, scalar);
398
8.92M
    }
399
557k
    Store(scalar, d, coeffs + i);
400
557k
  }
401
278k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
45.9M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
45.9M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
45.9M
      {
102
45.9M
          0.2500000000000000,
103
45.9M
          0.8769029297991420f,
104
45.9M
          0.0000000000000000,
105
45.9M
          0.0000000000000000,
106
45.9M
          0.0000000000000000,
107
45.9M
          -0.4105377591765233f,
108
45.9M
          0.0000000000000000,
109
45.9M
          0.0000000000000000,
110
45.9M
          0.0000000000000000,
111
45.9M
          0.0000000000000000,
112
45.9M
          0.0000000000000000,
113
45.9M
          0.0000000000000000,
114
45.9M
          0.0000000000000000,
115
45.9M
          0.0000000000000000,
116
45.9M
          0.0000000000000000,
117
45.9M
          0.0000000000000000,
118
45.9M
      },
119
45.9M
      {
120
45.9M
          0.2500000000000000,
121
45.9M
          0.2206518106944235f,
122
45.9M
          0.0000000000000000,
123
45.9M
          0.0000000000000000,
124
45.9M
          -0.7071067811865474f,
125
45.9M
          0.6235485373547691f,
126
45.9M
          0.0000000000000000,
127
45.9M
          0.0000000000000000,
128
45.9M
          0.0000000000000000,
129
45.9M
          0.0000000000000000,
130
45.9M
          0.0000000000000000,
131
45.9M
          0.0000000000000000,
132
45.9M
          0.0000000000000000,
133
45.9M
          0.0000000000000000,
134
45.9M
          0.0000000000000000,
135
45.9M
          0.0000000000000000,
136
45.9M
      },
137
45.9M
      {
138
45.9M
          0.2500000000000000,
139
45.9M
          -0.1014005039375376f,
140
45.9M
          0.4067007583026075f,
141
45.9M
          -0.2125574805828875f,
142
45.9M
          0.0000000000000000,
143
45.9M
          -0.0643507165794627f,
144
45.9M
          -0.4517556589999482f,
145
45.9M
          -0.3046847507248690f,
146
45.9M
          0.3017929516615495f,
147
45.9M
          0.4082482904638627f,
148
45.9M
          0.1747866975480809f,
149
45.9M
          -0.2110560104933578f,
150
45.9M
          -0.1426608480880726f,
151
45.9M
          -0.1381354035075859f,
152
45.9M
          -0.1743760259965107f,
153
45.9M
          0.1135498731499434f,
154
45.9M
      },
155
45.9M
      {
156
45.9M
          0.2500000000000000,
157
45.9M
          -0.1014005039375375f,
158
45.9M
          0.4444481661973445f,
159
45.9M
          0.3085497062849767f,
160
45.9M
          0.0000000000000000f,
161
45.9M
          -0.0643507165794627f,
162
45.9M
          0.1585450355184006f,
163
45.9M
          0.5112616136591823f,
164
45.9M
          0.2579236279634118f,
165
45.9M
          0.0000000000000000,
166
45.9M
          0.0812611176717539f,
167
45.9M
          0.1856718091610980f,
168
45.9M
          -0.3416446842253372f,
169
45.9M
          0.3302282550303788f,
170
45.9M
          0.0702790691196284f,
171
45.9M
          -0.0741750459581035f,
172
45.9M
      },
173
45.9M
      {
174
45.9M
          0.2500000000000000,
175
45.9M
          0.2206518106944236f,
176
45.9M
          0.0000000000000000,
177
45.9M
          0.0000000000000000,
178
45.9M
          0.7071067811865476f,
179
45.9M
          0.6235485373547694f,
180
45.9M
          0.0000000000000000,
181
45.9M
          0.0000000000000000,
182
45.9M
          0.0000000000000000,
183
45.9M
          0.0000000000000000,
184
45.9M
          0.0000000000000000,
185
45.9M
          0.0000000000000000,
186
45.9M
          0.0000000000000000,
187
45.9M
          0.0000000000000000,
188
45.9M
          0.0000000000000000,
189
45.9M
          0.0000000000000000,
190
45.9M
      },
191
45.9M
      {
192
45.9M
          0.2500000000000000,
193
45.9M
          -0.1014005039375378f,
194
45.9M
          0.0000000000000000,
195
45.9M
          0.4706702258572536f,
196
45.9M
          0.0000000000000000,
197
45.9M
          -0.0643507165794628f,
198
45.9M
          -0.0403851516082220f,
199
45.9M
          0.0000000000000000,
200
45.9M
          0.1627234014286620f,
201
45.9M
          0.0000000000000000,
202
45.9M
          0.0000000000000000,
203
45.9M
          0.0000000000000000,
204
45.9M
          0.7367497537172237f,
205
45.9M
          0.0875511500058708f,
206
45.9M
          -0.2921026642334881f,
207
45.9M
          0.1940289303259434f,
208
45.9M
      },
209
45.9M
      {
210
45.9M
          0.2500000000000000,
211
45.9M
          -0.1014005039375377f,
212
45.9M
          0.1957439937204294f,
213
45.9M
          -0.1621205195722993f,
214
45.9M
          0.0000000000000000,
215
45.9M
          -0.0643507165794628f,
216
45.9M
          0.0074182263792424f,
217
45.9M
          -0.2904801297289980f,
218
45.9M
          0.0952002265347504f,
219
45.9M
          0.0000000000000000,
220
45.9M
          -0.3675398009862027f,
221
45.9M
          0.4921585901373873f,
222
45.9M
          0.2462710772207515f,
223
45.9M
          -0.0794670660590957f,
224
45.9M
          0.3623817333531167f,
225
45.9M
          -0.4351904965232280f,
226
45.9M
      },
227
45.9M
      {
228
45.9M
          0.2500000000000000,
229
45.9M
          -0.1014005039375376f,
230
45.9M
          0.2929100136981264f,
231
45.9M
          0.0000000000000000,
232
45.9M
          0.0000000000000000,
233
45.9M
          -0.0643507165794627f,
234
45.9M
          0.3935103426921017f,
235
45.9M
          -0.0657870154914280f,
236
45.9M
          0.0000000000000000,
237
45.9M
          -0.4082482904638628f,
238
45.9M
          -0.3078822139579090f,
239
45.9M
          -0.3852501370925192f,
240
45.9M
          -0.0857401903551931f,
241
45.9M
          -0.4613374887461511f,
242
45.9M
          0.0000000000000000,
243
45.9M
          0.2191868483885747f,
244
45.9M
      },
245
45.9M
      {
246
45.9M
          0.2500000000000000,
247
45.9M
          -0.1014005039375376f,
248
45.9M
          -0.4067007583026072f,
249
45.9M
          -0.2125574805828705f,
250
45.9M
          0.0000000000000000,
251
45.9M
          -0.0643507165794627f,
252
45.9M
          -0.4517556589999464f,
253
45.9M
          0.3046847507248840f,
254
45.9M
          0.3017929516615503f,
255
45.9M
          -0.4082482904638635f,
256
45.9M
          -0.1747866975480813f,
257
45.9M
          0.2110560104933581f,
258
45.9M
          -0.1426608480880734f,
259
45.9M
          -0.1381354035075829f,
260
45.9M
          -0.1743760259965108f,
261
45.9M
          0.1135498731499426f,
262
45.9M
      },
263
45.9M
      {
264
45.9M
          0.2500000000000000,
265
45.9M
          -0.1014005039375377f,
266
45.9M
          -0.1957439937204287f,
267
45.9M
          -0.1621205195722833f,
268
45.9M
          0.0000000000000000,
269
45.9M
          -0.0643507165794628f,
270
45.9M
          0.0074182263792444f,
271
45.9M
          0.2904801297290076f,
272
45.9M
          0.0952002265347505f,
273
45.9M
          0.0000000000000000,
274
45.9M
          0.3675398009862011f,
275
45.9M
          -0.4921585901373891f,
276
45.9M
          0.2462710772207514f,
277
45.9M
          -0.0794670660591026f,
278
45.9M
          0.3623817333531165f,
279
45.9M
          -0.4351904965232251f,
280
45.9M
      },
281
45.9M
      {
282
45.9M
          0.2500000000000000,
283
45.9M
          -0.1014005039375375f,
284
45.9M
          0.0000000000000000,
285
45.9M
          -0.4706702258572528f,
286
45.9M
          0.0000000000000000,
287
45.9M
          -0.0643507165794627f,
288
45.9M
          0.1107416575309343f,
289
45.9M
          0.0000000000000000,
290
45.9M
          -0.1627234014286617f,
291
45.9M
          0.0000000000000000,
292
45.9M
          0.0000000000000000,
293
45.9M
          0.0000000000000000,
294
45.9M
          0.1488339922711357f,
295
45.9M
          0.4972464710953509f,
296
45.9M
          0.2921026642334879f,
297
45.9M
          0.5550443808910661f,
298
45.9M
      },
299
45.9M
      {
300
45.9M
          0.2500000000000000,
301
45.9M
          -0.1014005039375377f,
302
45.9M
          0.1137907446044809f,
303
45.9M
          -0.1464291867126764f,
304
45.9M
          0.0000000000000000,
305
45.9M
          -0.0643507165794628f,
306
45.9M
          0.0829816309488205f,
307
45.9M
          -0.2388977352334460f,
308
45.9M
          -0.3531238544981630f,
309
45.9M
          -0.4082482904638630f,
310
45.9M
          0.4826689115059883f,
311
45.9M
          0.1741941265991622f,
312
45.9M
          -0.0476868035022925f,
313
45.9M
          0.1253805944856366f,
314
45.9M
          -0.4326608024727445f,
315
45.9M
          -0.2546827712406646f,
316
45.9M
      },
317
45.9M
      {
318
45.9M
          0.2500000000000000,
319
45.9M
          -0.1014005039375377f,
320
45.9M
          -0.4444481661973438f,
321
45.9M
          0.3085497062849487f,
322
45.9M
          0.0000000000000000,
323
45.9M
          -0.0643507165794628f,
324
45.9M
          0.1585450355183970f,
325
45.9M
          -0.5112616136592012f,
326
45.9M
          0.2579236279634129f,
327
45.9M
          0.0000000000000000,
328
45.9M
          -0.0812611176717504f,
329
45.9M
          -0.1856718091610990f,
330
45.9M
          -0.3416446842253373f,
331
45.9M
          0.3302282550303805f,
332
45.9M
          0.0702790691196282f,
333
45.9M
          -0.0741750459581023f,
334
45.9M
      },
335
45.9M
      {
336
45.9M
          0.2500000000000000,
337
45.9M
          -0.1014005039375376f,
338
45.9M
          -0.2929100136981264f,
339
45.9M
          0.0000000000000000,
340
45.9M
          0.0000000000000000,
341
45.9M
          -0.0643507165794627f,
342
45.9M
          0.3935103426921022f,
343
45.9M
          0.0657870154914254f,
344
45.9M
          0.0000000000000000,
345
45.9M
          0.4082482904638634f,
346
45.9M
          0.3078822139579031f,
347
45.9M
          0.3852501370925211f,
348
45.9M
          -0.0857401903551927f,
349
45.9M
          -0.4613374887461554f,
350
45.9M
          0.0000000000000000,
351
45.9M
          0.2191868483885728f,
352
45.9M
      },
353
45.9M
      {
354
45.9M
          0.2500000000000000,
355
45.9M
          -0.1014005039375376f,
356
45.9M
          -0.1137907446044814f,
357
45.9M
          -0.1464291867126654f,
358
45.9M
          0.0000000000000000,
359
45.9M
          -0.0643507165794627f,
360
45.9M
          0.0829816309488214f,
361
45.9M
          0.2388977352334547f,
362
45.9M
          -0.3531238544981624f,
363
45.9M
          0.4082482904638630f,
364
45.9M
          -0.4826689115059858f,
365
45.9M
          -0.1741941265991621f,
366
45.9M
          -0.0476868035022928f,
367
45.9M
          0.1253805944856431f,
368
45.9M
          -0.4326608024727457f,
369
45.9M
          -0.2546827712406641f,
370
45.9M
      },
371
45.9M
      {
372
45.9M
          0.2500000000000000,
373
45.9M
          -0.1014005039375374f,
374
45.9M
          0.0000000000000000,
375
45.9M
          0.4251149611657548f,
376
45.9M
          0.0000000000000000,
377
45.9M
          -0.0643507165794626f,
378
45.9M
          -0.4517556589999480f,
379
45.9M
          0.0000000000000000,
380
45.9M
          -0.6035859033230976f,
381
45.9M
          0.0000000000000000,
382
45.9M
          0.0000000000000000,
383
45.9M
          0.0000000000000000,
384
45.9M
          -0.1426608480880724f,
385
45.9M
          -0.1381354035075845f,
386
45.9M
          0.3487520519930227f,
387
45.9M
          0.1135498731499429f,
388
45.9M
      },
389
45.9M
  };
390
391
45.9M
  const HWY_CAPPED(float, 16) d;
392
137M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
91.9M
    auto scalar = Zero(d);
394
1.56G
    for (size_t j = 0; j < 16; j++) {
395
1.47G
      auto px = Set(d, pixels[j]);
396
1.47G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
1.47G
      scalar = MulAdd(px, basis, scalar);
398
1.47G
    }
399
91.9M
    Store(scalar, d, coeffs + i);
400
91.9M
  }
401
45.9M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
402
403
// Coefficient layout:
404
//  - (even, even) positions hold AFV coefficients
405
//  - (odd, even) positions hold DCT4x4 coefficients
406
//  - (any, odd) positions hold DCT4x8 coefficients
407
template <size_t afv_kind>
408
void AFVTransformFromPixels(const float* JXL_RESTRICT pixels,
409
                            size_t pixels_stride,
410
46.5M
                            float* JXL_RESTRICT coefficients) {
411
46.5M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
46.5M
  size_t afv_x = afv_kind & 1;
413
46.5M
  size_t afv_y = afv_kind / 2;
414
46.5M
  HWY_ALIGN float block[4 * 8] = {};
415
232M
  for (size_t iy = 0; iy < 4; iy++) {
416
930M
    for (size_t ix = 0; ix < 4; ix++) {
417
744M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
744M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
744M
    }
420
186M
  }
421
  // AFV coefficients in (even, even) positions.
422
46.5M
  HWY_ALIGN float coeff[4 * 4];
423
46.5M
  AFVDCT4x4(block, coeff);
424
232M
  for (size_t iy = 0; iy < 4; iy++) {
425
930M
    for (size_t ix = 0; ix < 4; ix++) {
426
744M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
744M
    }
428
186M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
46.5M
  ComputeScaledDCT<4, 4>()(
431
46.5M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
46.5M
              pixels_stride),
433
46.5M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
232M
  for (size_t iy = 0; iy < 4; iy++) {
436
1.67G
    for (size_t ix = 0; ix < 8; ix++) {
437
1.48G
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
1.48G
    }
439
186M
  }
440
  // 4x8 DCT of the other half of the block.
441
46.5M
  ComputeScaledDCT<4, 8>()(
442
46.5M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
46.5M
      block, scratch_space);
444
232M
  for (size_t iy = 0; iy < 4; iy++) {
445
1.67G
    for (size_t ix = 0; ix < 8; ix++) {
446
1.48G
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
1.48G
    }
448
186M
  }
449
46.5M
  float block00 = coefficients[0] * 0.25f;
450
46.5M
  float block01 = coefficients[1];
451
46.5M
  float block10 = coefficients[8];
452
46.5M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
46.5M
  coefficients[1] = (block00 - block01) * 0.5f;
454
46.5M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
46.5M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
95.2k
                            float* JXL_RESTRICT coefficients) {
411
95.2k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
95.2k
  size_t afv_x = afv_kind & 1;
413
95.2k
  size_t afv_y = afv_kind / 2;
414
95.2k
  HWY_ALIGN float block[4 * 8] = {};
415
476k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.90M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.52M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.52M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.52M
    }
420
381k
  }
421
  // AFV coefficients in (even, even) positions.
422
95.2k
  HWY_ALIGN float coeff[4 * 4];
423
95.2k
  AFVDCT4x4(block, coeff);
424
476k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.90M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.52M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.52M
    }
428
381k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
95.2k
  ComputeScaledDCT<4, 4>()(
431
95.2k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
95.2k
              pixels_stride),
433
95.2k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
476k
  for (size_t iy = 0; iy < 4; iy++) {
436
3.42M
    for (size_t ix = 0; ix < 8; ix++) {
437
3.04M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
3.04M
    }
439
381k
  }
440
  // 4x8 DCT of the other half of the block.
441
95.2k
  ComputeScaledDCT<4, 8>()(
442
95.2k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
95.2k
      block, scratch_space);
444
476k
  for (size_t iy = 0; iy < 4; iy++) {
445
3.42M
    for (size_t ix = 0; ix < 8; ix++) {
446
3.04M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
3.04M
    }
448
381k
  }
449
95.2k
  float block00 = coefficients[0] * 0.25f;
450
95.2k
  float block01 = coefficients[1];
451
95.2k
  float block10 = coefficients[8];
452
95.2k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
95.2k
  coefficients[1] = (block00 - block01) * 0.5f;
454
95.2k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
95.2k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
51.4k
                            float* JXL_RESTRICT coefficients) {
411
51.4k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
51.4k
  size_t afv_x = afv_kind & 1;
413
51.4k
  size_t afv_y = afv_kind / 2;
414
51.4k
  HWY_ALIGN float block[4 * 8] = {};
415
257k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.02M
    for (size_t ix = 0; ix < 4; ix++) {
417
823k
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
823k
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
823k
    }
420
205k
  }
421
  // AFV coefficients in (even, even) positions.
422
51.4k
  HWY_ALIGN float coeff[4 * 4];
423
51.4k
  AFVDCT4x4(block, coeff);
424
257k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.02M
    for (size_t ix = 0; ix < 4; ix++) {
426
823k
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
823k
    }
428
205k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
51.4k
  ComputeScaledDCT<4, 4>()(
431
51.4k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
51.4k
              pixels_stride),
433
51.4k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
257k
  for (size_t iy = 0; iy < 4; iy++) {
436
1.85M
    for (size_t ix = 0; ix < 8; ix++) {
437
1.64M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
1.64M
    }
439
205k
  }
440
  // 4x8 DCT of the other half of the block.
441
51.4k
  ComputeScaledDCT<4, 8>()(
442
51.4k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
51.4k
      block, scratch_space);
444
257k
  for (size_t iy = 0; iy < 4; iy++) {
445
1.85M
    for (size_t ix = 0; ix < 8; ix++) {
446
1.64M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
1.64M
    }
448
205k
  }
449
51.4k
  float block00 = coefficients[0] * 0.25f;
450
51.4k
  float block01 = coefficients[1];
451
51.4k
  float block10 = coefficients[8];
452
51.4k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
51.4k
  coefficients[1] = (block00 - block01) * 0.5f;
454
51.4k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
51.4k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
65.1k
                            float* JXL_RESTRICT coefficients) {
411
65.1k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
65.1k
  size_t afv_x = afv_kind & 1;
413
65.1k
  size_t afv_y = afv_kind / 2;
414
65.1k
  HWY_ALIGN float block[4 * 8] = {};
415
325k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.30M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.04M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.04M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.04M
    }
420
260k
  }
421
  // AFV coefficients in (even, even) positions.
422
65.1k
  HWY_ALIGN float coeff[4 * 4];
423
65.1k
  AFVDCT4x4(block, coeff);
424
325k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.30M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.04M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.04M
    }
428
260k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
65.1k
  ComputeScaledDCT<4, 4>()(
431
65.1k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
65.1k
              pixels_stride),
433
65.1k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
325k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.34M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.08M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.08M
    }
439
260k
  }
440
  // 4x8 DCT of the other half of the block.
441
65.1k
  ComputeScaledDCT<4, 8>()(
442
65.1k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
65.1k
      block, scratch_space);
444
325k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.34M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.08M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.08M
    }
448
260k
  }
449
65.1k
  float block00 = coefficients[0] * 0.25f;
450
65.1k
  float block01 = coefficients[1];
451
65.1k
  float block10 = coefficients[8];
452
65.1k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
65.1k
  coefficients[1] = (block00 - block01) * 0.5f;
454
65.1k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
65.1k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
67.1k
                            float* JXL_RESTRICT coefficients) {
411
67.1k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
67.1k
  size_t afv_x = afv_kind & 1;
413
67.1k
  size_t afv_y = afv_kind / 2;
414
67.1k
  HWY_ALIGN float block[4 * 8] = {};
415
335k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.34M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.07M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.07M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.07M
    }
420
268k
  }
421
  // AFV coefficients in (even, even) positions.
422
67.1k
  HWY_ALIGN float coeff[4 * 4];
423
67.1k
  AFVDCT4x4(block, coeff);
424
335k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.34M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.07M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.07M
    }
428
268k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
67.1k
  ComputeScaledDCT<4, 4>()(
431
67.1k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
67.1k
              pixels_stride),
433
67.1k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
335k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.41M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.14M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.14M
    }
439
268k
  }
440
  // 4x8 DCT of the other half of the block.
441
67.1k
  ComputeScaledDCT<4, 8>()(
442
67.1k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
67.1k
      block, scratch_space);
444
335k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.41M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.14M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.14M
    }
448
268k
  }
449
67.1k
  float block00 = coefficients[0] * 0.25f;
450
67.1k
  float block01 = coefficients[1];
451
67.1k
  float block10 = coefficients[8];
452
67.1k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
67.1k
  coefficients[1] = (block00 - block01) * 0.5f;
454
67.1k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
67.1k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
95.2k
                            float* JXL_RESTRICT coefficients) {
411
95.2k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
95.2k
  size_t afv_x = afv_kind & 1;
413
95.2k
  size_t afv_y = afv_kind / 2;
414
95.2k
  HWY_ALIGN float block[4 * 8] = {};
415
476k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.90M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.52M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.52M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.52M
    }
420
381k
  }
421
  // AFV coefficients in (even, even) positions.
422
95.2k
  HWY_ALIGN float coeff[4 * 4];
423
95.2k
  AFVDCT4x4(block, coeff);
424
476k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.90M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.52M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.52M
    }
428
381k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
95.2k
  ComputeScaledDCT<4, 4>()(
431
95.2k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
95.2k
              pixels_stride),
433
95.2k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
476k
  for (size_t iy = 0; iy < 4; iy++) {
436
3.42M
    for (size_t ix = 0; ix < 8; ix++) {
437
3.04M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
3.04M
    }
439
381k
  }
440
  // 4x8 DCT of the other half of the block.
441
95.2k
  ComputeScaledDCT<4, 8>()(
442
95.2k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
95.2k
      block, scratch_space);
444
476k
  for (size_t iy = 0; iy < 4; iy++) {
445
3.42M
    for (size_t ix = 0; ix < 8; ix++) {
446
3.04M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
3.04M
    }
448
381k
  }
449
95.2k
  float block00 = coefficients[0] * 0.25f;
450
95.2k
  float block01 = coefficients[1];
451
95.2k
  float block10 = coefficients[8];
452
95.2k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
95.2k
  coefficients[1] = (block00 - block01) * 0.5f;
454
95.2k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
95.2k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
51.4k
                            float* JXL_RESTRICT coefficients) {
411
51.4k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
51.4k
  size_t afv_x = afv_kind & 1;
413
51.4k
  size_t afv_y = afv_kind / 2;
414
51.4k
  HWY_ALIGN float block[4 * 8] = {};
415
257k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.02M
    for (size_t ix = 0; ix < 4; ix++) {
417
823k
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
823k
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
823k
    }
420
205k
  }
421
  // AFV coefficients in (even, even) positions.
422
51.4k
  HWY_ALIGN float coeff[4 * 4];
423
51.4k
  AFVDCT4x4(block, coeff);
424
257k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.02M
    for (size_t ix = 0; ix < 4; ix++) {
426
823k
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
823k
    }
428
205k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
51.4k
  ComputeScaledDCT<4, 4>()(
431
51.4k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
51.4k
              pixels_stride),
433
51.4k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
257k
  for (size_t iy = 0; iy < 4; iy++) {
436
1.85M
    for (size_t ix = 0; ix < 8; ix++) {
437
1.64M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
1.64M
    }
439
205k
  }
440
  // 4x8 DCT of the other half of the block.
441
51.4k
  ComputeScaledDCT<4, 8>()(
442
51.4k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
51.4k
      block, scratch_space);
444
257k
  for (size_t iy = 0; iy < 4; iy++) {
445
1.85M
    for (size_t ix = 0; ix < 8; ix++) {
446
1.64M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
1.64M
    }
448
205k
  }
449
51.4k
  float block00 = coefficients[0] * 0.25f;
450
51.4k
  float block01 = coefficients[1];
451
51.4k
  float block10 = coefficients[8];
452
51.4k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
51.4k
  coefficients[1] = (block00 - block01) * 0.5f;
454
51.4k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
51.4k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
65.1k
                            float* JXL_RESTRICT coefficients) {
411
65.1k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
65.1k
  size_t afv_x = afv_kind & 1;
413
65.1k
  size_t afv_y = afv_kind / 2;
414
65.1k
  HWY_ALIGN float block[4 * 8] = {};
415
325k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.30M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.04M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.04M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.04M
    }
420
260k
  }
421
  // AFV coefficients in (even, even) positions.
422
65.1k
  HWY_ALIGN float coeff[4 * 4];
423
65.1k
  AFVDCT4x4(block, coeff);
424
325k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.30M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.04M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.04M
    }
428
260k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
65.1k
  ComputeScaledDCT<4, 4>()(
431
65.1k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
65.1k
              pixels_stride),
433
65.1k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
325k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.34M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.08M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.08M
    }
439
260k
  }
440
  // 4x8 DCT of the other half of the block.
441
65.1k
  ComputeScaledDCT<4, 8>()(
442
65.1k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
65.1k
      block, scratch_space);
444
325k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.34M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.08M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.08M
    }
448
260k
  }
449
65.1k
  float block00 = coefficients[0] * 0.25f;
450
65.1k
  float block01 = coefficients[1];
451
65.1k
  float block10 = coefficients[8];
452
65.1k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
65.1k
  coefficients[1] = (block00 - block01) * 0.5f;
454
65.1k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
65.1k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
67.1k
                            float* JXL_RESTRICT coefficients) {
411
67.1k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
67.1k
  size_t afv_x = afv_kind & 1;
413
67.1k
  size_t afv_y = afv_kind / 2;
414
67.1k
  HWY_ALIGN float block[4 * 8] = {};
415
335k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.34M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.07M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.07M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.07M
    }
420
268k
  }
421
  // AFV coefficients in (even, even) positions.
422
67.1k
  HWY_ALIGN float coeff[4 * 4];
423
67.1k
  AFVDCT4x4(block, coeff);
424
335k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.34M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.07M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.07M
    }
428
268k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
67.1k
  ComputeScaledDCT<4, 4>()(
431
67.1k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
67.1k
              pixels_stride),
433
67.1k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
335k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.41M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.14M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.14M
    }
439
268k
  }
440
  // 4x8 DCT of the other half of the block.
441
67.1k
  ComputeScaledDCT<4, 8>()(
442
67.1k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
67.1k
      block, scratch_space);
444
335k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.41M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.14M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.14M
    }
448
268k
  }
449
67.1k
  float block00 = coefficients[0] * 0.25f;
450
67.1k
  float block01 = coefficients[1];
451
67.1k
  float block10 = coefficients[8];
452
67.1k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
67.1k
  coefficients[1] = (block00 - block01) * 0.5f;
454
67.1k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
67.1k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
11.4M
                            float* JXL_RESTRICT coefficients) {
411
11.4M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
11.4M
  size_t afv_x = afv_kind & 1;
413
11.4M
  size_t afv_y = afv_kind / 2;
414
11.4M
  HWY_ALIGN float block[4 * 8] = {};
415
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
416
229M
    for (size_t ix = 0; ix < 4; ix++) {
417
183M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
183M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
183M
    }
420
45.9M
  }
421
  // AFV coefficients in (even, even) positions.
422
11.4M
  HWY_ALIGN float coeff[4 * 4];
423
11.4M
  AFVDCT4x4(block, coeff);
424
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
425
229M
    for (size_t ix = 0; ix < 4; ix++) {
426
183M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
183M
    }
428
45.9M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
11.4M
  ComputeScaledDCT<4, 4>()(
431
11.4M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
11.4M
              pixels_stride),
433
11.4M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
436
413M
    for (size_t ix = 0; ix < 8; ix++) {
437
367M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
367M
    }
439
45.9M
  }
440
  // 4x8 DCT of the other half of the block.
441
11.4M
  ComputeScaledDCT<4, 8>()(
442
11.4M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
11.4M
      block, scratch_space);
444
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
445
413M
    for (size_t ix = 0; ix < 8; ix++) {
446
367M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
367M
    }
448
45.9M
  }
449
11.4M
  float block00 = coefficients[0] * 0.25f;
450
11.4M
  float block01 = coefficients[1];
451
11.4M
  float block10 = coefficients[8];
452
11.4M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
11.4M
  coefficients[1] = (block00 - block01) * 0.5f;
454
11.4M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
11.4M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
11.4M
                            float* JXL_RESTRICT coefficients) {
411
11.4M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
11.4M
  size_t afv_x = afv_kind & 1;
413
11.4M
  size_t afv_y = afv_kind / 2;
414
11.4M
  HWY_ALIGN float block[4 * 8] = {};
415
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
416
229M
    for (size_t ix = 0; ix < 4; ix++) {
417
183M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
183M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
183M
    }
420
45.9M
  }
421
  // AFV coefficients in (even, even) positions.
422
11.4M
  HWY_ALIGN float coeff[4 * 4];
423
11.4M
  AFVDCT4x4(block, coeff);
424
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
425
229M
    for (size_t ix = 0; ix < 4; ix++) {
426
183M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
183M
    }
428
45.9M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
11.4M
  ComputeScaledDCT<4, 4>()(
431
11.4M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
11.4M
              pixels_stride),
433
11.4M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
436
413M
    for (size_t ix = 0; ix < 8; ix++) {
437
367M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
367M
    }
439
45.9M
  }
440
  // 4x8 DCT of the other half of the block.
441
11.4M
  ComputeScaledDCT<4, 8>()(
442
11.4M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
11.4M
      block, scratch_space);
444
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
445
413M
    for (size_t ix = 0; ix < 8; ix++) {
446
367M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
367M
    }
448
45.9M
  }
449
11.4M
  float block00 = coefficients[0] * 0.25f;
450
11.4M
  float block01 = coefficients[1];
451
11.4M
  float block10 = coefficients[8];
452
11.4M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
11.4M
  coefficients[1] = (block00 - block01) * 0.5f;
454
11.4M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
11.4M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
11.4M
                            float* JXL_RESTRICT coefficients) {
411
11.4M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
11.4M
  size_t afv_x = afv_kind & 1;
413
11.4M
  size_t afv_y = afv_kind / 2;
414
11.4M
  HWY_ALIGN float block[4 * 8] = {};
415
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
416
229M
    for (size_t ix = 0; ix < 4; ix++) {
417
183M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
183M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
183M
    }
420
45.9M
  }
421
  // AFV coefficients in (even, even) positions.
422
11.4M
  HWY_ALIGN float coeff[4 * 4];
423
11.4M
  AFVDCT4x4(block, coeff);
424
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
425
229M
    for (size_t ix = 0; ix < 4; ix++) {
426
183M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
183M
    }
428
45.9M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
11.4M
  ComputeScaledDCT<4, 4>()(
431
11.4M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
11.4M
              pixels_stride),
433
11.4M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
436
413M
    for (size_t ix = 0; ix < 8; ix++) {
437
367M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
367M
    }
439
45.9M
  }
440
  // 4x8 DCT of the other half of the block.
441
11.4M
  ComputeScaledDCT<4, 8>()(
442
11.4M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
11.4M
      block, scratch_space);
444
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
445
413M
    for (size_t ix = 0; ix < 8; ix++) {
446
367M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
367M
    }
448
45.9M
  }
449
11.4M
  float block00 = coefficients[0] * 0.25f;
450
11.4M
  float block01 = coefficients[1];
451
11.4M
  float block10 = coefficients[8];
452
11.4M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
11.4M
  coefficients[1] = (block00 - block01) * 0.5f;
454
11.4M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
11.4M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
11.4M
                            float* JXL_RESTRICT coefficients) {
411
11.4M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
11.4M
  size_t afv_x = afv_kind & 1;
413
11.4M
  size_t afv_y = afv_kind / 2;
414
11.4M
  HWY_ALIGN float block[4 * 8] = {};
415
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
416
229M
    for (size_t ix = 0; ix < 4; ix++) {
417
183M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
183M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
183M
    }
420
45.9M
  }
421
  // AFV coefficients in (even, even) positions.
422
11.4M
  HWY_ALIGN float coeff[4 * 4];
423
11.4M
  AFVDCT4x4(block, coeff);
424
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
425
229M
    for (size_t ix = 0; ix < 4; ix++) {
426
183M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
183M
    }
428
45.9M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
11.4M
  ComputeScaledDCT<4, 4>()(
431
11.4M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
11.4M
              pixels_stride),
433
11.4M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
436
413M
    for (size_t ix = 0; ix < 8; ix++) {
437
367M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
367M
    }
439
45.9M
  }
440
  // 4x8 DCT of the other half of the block.
441
11.4M
  ComputeScaledDCT<4, 8>()(
442
11.4M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
11.4M
      block, scratch_space);
444
57.4M
  for (size_t iy = 0; iy < 4; iy++) {
445
413M
    for (size_t ix = 0; ix < 8; ix++) {
446
367M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
367M
    }
448
45.9M
  }
449
11.4M
  float block00 = coefficients[0] * 0.25f;
450
11.4M
  float block01 = coefficients[1];
451
11.4M
  float block10 = coefficients[8];
452
11.4M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
11.4M
  coefficients[1] = (block00 - block01) * 0.5f;
454
11.4M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
11.4M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
456
457
HWY_MAYBE_UNUSED void TransformFromPixels(const AcStrategyType strategy,
458
                                          const float* JXL_RESTRICT pixels,
459
                                          size_t pixels_stride,
460
                                          float* JXL_RESTRICT coefficients,
461
165M
                                          float* JXL_RESTRICT scratch_space) {
462
165M
  using Type = AcStrategyType;
463
165M
  switch (strategy) {
464
12.8M
    case Type::IDENTITY: {
465
38.4M
      for (size_t y = 0; y < 2; y++) {
466
76.8M
        for (size_t x = 0; x < 2; x++) {
467
51.2M
          float block_dc = 0;
468
256M
          for (size_t iy = 0; iy < 4; iy++) {
469
1.02G
            for (size_t ix = 0; ix < 4; ix++) {
470
820M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
820M
            }
472
205M
          }
473
51.2M
          block_dc *= 1.0f / 16;
474
256M
          for (size_t iy = 0; iy < 4; iy++) {
475
1.02G
            for (size_t ix = 0; ix < 4; ix++) {
476
820M
              if (ix == 1 && iy == 1) continue;
477
768M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
768M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
768M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
768M
            }
481
205M
          }
482
51.2M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
51.2M
          coefficients[y * 8 + x] = block_dc;
484
51.2M
        }
485
25.6M
      }
486
12.8M
      float block00 = coefficients[0];
487
12.8M
      float block01 = coefficients[1];
488
12.8M
      float block10 = coefficients[8];
489
12.8M
      float block11 = coefficients[9];
490
12.8M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
12.8M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
12.8M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
12.8M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
12.8M
      break;
495
0
    }
496
11.7M
    case Type::DCT8X4: {
497
35.2M
      for (size_t x = 0; x < 2; x++) {
498
23.5M
        HWY_ALIGN float block[4 * 8];
499
23.5M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
23.5M
                                 scratch_space);
501
117M
        for (size_t iy = 0; iy < 4; iy++) {
502
847M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
753M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
753M
          }
506
94.1M
        }
507
23.5M
      }
508
11.7M
      float block0 = coefficients[0];
509
11.7M
      float block1 = coefficients[8];
510
11.7M
      coefficients[0] = (block0 + block1) * 0.5f;
511
11.7M
      coefficients[8] = (block0 - block1) * 0.5f;
512
11.7M
      break;
513
0
    }
514
11.6M
    case Type::DCT4X8: {
515
34.8M
      for (size_t y = 0; y < 2; y++) {
516
23.2M
        HWY_ALIGN float block[4 * 8];
517
23.2M
        ComputeScaledDCT<4, 8>()(
518
23.2M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
23.2M
            scratch_space);
520
116M
        for (size_t iy = 0; iy < 4; iy++) {
521
836M
          for (size_t ix = 0; ix < 8; ix++) {
522
743M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
743M
          }
524
92.9M
        }
525
23.2M
      }
526
11.6M
      float block0 = coefficients[0];
527
11.6M
      float block1 = coefficients[8];
528
11.6M
      coefficients[0] = (block0 + block1) * 0.5f;
529
11.6M
      coefficients[8] = (block0 - block1) * 0.5f;
530
11.6M
      break;
531
0
    }
532
11.4M
    case Type::DCT4X4: {
533
34.4M
      for (size_t y = 0; y < 2; y++) {
534
68.9M
        for (size_t x = 0; x < 2; x++) {
535
45.9M
          HWY_ALIGN float block[4 * 4];
536
45.9M
          ComputeScaledDCT<4, 4>()(
537
45.9M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
45.9M
              block, scratch_space);
539
229M
          for (size_t iy = 0; iy < 4; iy++) {
540
919M
            for (size_t ix = 0; ix < 4; ix++) {
541
735M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
735M
            }
543
183M
          }
544
45.9M
        }
545
22.9M
      }
546
11.4M
      float block00 = coefficients[0];
547
11.4M
      float block01 = coefficients[1];
548
11.4M
      float block10 = coefficients[8];
549
11.4M
      float block11 = coefficients[9];
550
11.4M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
11.4M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
11.4M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
11.4M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
11.4M
      break;
555
0
    }
556
15.2M
    case Type::DCT2X2: {
557
15.2M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
15.2M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
15.2M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
15.2M
      break;
561
0
    }
562
4.94M
    case Type::DCT16X16: {
563
4.94M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
4.94M
                                 scratch_space);
565
4.94M
      break;
566
0
    }
567
9.58M
    case Type::DCT16X8: {
568
9.58M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
9.58M
                                scratch_space);
570
9.58M
      break;
571
0
    }
572
9.59M
    case Type::DCT8X16: {
573
9.59M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
9.59M
                                scratch_space);
575
9.59M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
1.90M
    case Type::DCT32X16: {
588
1.90M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
1.90M
                                 scratch_space);
590
1.90M
      break;
591
0
    }
592
1.88M
    case Type::DCT16X32: {
593
1.88M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
1.88M
                                 scratch_space);
595
1.88M
      break;
596
0
    }
597
1.07M
    case Type::DCT32X32: {
598
1.07M
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
1.07M
                                 scratch_space);
600
1.07M
      break;
601
0
    }
602
25.7M
    case Type::DCT: {
603
25.7M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
25.7M
                               scratch_space);
605
25.7M
      break;
606
0
    }
607
11.6M
    case Type::AFV0: {
608
11.6M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
11.6M
      break;
610
0
    }
611
11.6M
    case Type::AFV1: {
612
11.6M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
11.6M
      break;
614
0
    }
615
11.6M
    case Type::AFV2: {
616
11.6M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
11.6M
      break;
618
0
    }
619
11.6M
    case Type::AFV3: {
620
11.6M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
11.6M
      break;
622
0
    }
623
257k
    case Type::DCT64X64: {
624
257k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
257k
                                 scratch_space);
626
257k
      break;
627
0
    }
628
573k
    case Type::DCT64X32: {
629
573k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
573k
                                 scratch_space);
631
573k
      break;
632
0
    }
633
340k
    case Type::DCT32X64: {
634
340k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
340k
                                 scratch_space);
636
340k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
165M
  }
669
165M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
5.17M
                                          float* JXL_RESTRICT scratch_space) {
462
5.17M
  using Type = AcStrategyType;
463
5.17M
  switch (strategy) {
464
658k
    case Type::IDENTITY: {
465
1.97M
      for (size_t y = 0; y < 2; y++) {
466
3.95M
        for (size_t x = 0; x < 2; x++) {
467
2.63M
          float block_dc = 0;
468
13.1M
          for (size_t iy = 0; iy < 4; iy++) {
469
52.6M
            for (size_t ix = 0; ix < 4; ix++) {
470
42.1M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
42.1M
            }
472
10.5M
          }
473
2.63M
          block_dc *= 1.0f / 16;
474
13.1M
          for (size_t iy = 0; iy < 4; iy++) {
475
52.6M
            for (size_t ix = 0; ix < 4; ix++) {
476
42.1M
              if (ix == 1 && iy == 1) continue;
477
39.5M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
39.5M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
39.5M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
39.5M
            }
481
10.5M
          }
482
2.63M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
2.63M
          coefficients[y * 8 + x] = block_dc;
484
2.63M
        }
485
1.31M
      }
486
658k
      float block00 = coefficients[0];
487
658k
      float block01 = coefficients[1];
488
658k
      float block10 = coefficients[8];
489
658k
      float block11 = coefficients[9];
490
658k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
658k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
658k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
658k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
658k
      break;
495
0
    }
496
134k
    case Type::DCT8X4: {
497
403k
      for (size_t x = 0; x < 2; x++) {
498
268k
        HWY_ALIGN float block[4 * 8];
499
268k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
268k
                                 scratch_space);
501
1.34M
        for (size_t iy = 0; iy < 4; iy++) {
502
9.67M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
8.60M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
8.60M
          }
506
1.07M
        }
507
268k
      }
508
134k
      float block0 = coefficients[0];
509
134k
      float block1 = coefficients[8];
510
134k
      coefficients[0] = (block0 + block1) * 0.5f;
511
134k
      coefficients[8] = (block0 - block1) * 0.5f;
512
134k
      break;
513
0
    }
514
62.3k
    case Type::DCT4X8: {
515
187k
      for (size_t y = 0; y < 2; y++) {
516
124k
        HWY_ALIGN float block[4 * 8];
517
124k
        ComputeScaledDCT<4, 8>()(
518
124k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
124k
            scratch_space);
520
623k
        for (size_t iy = 0; iy < 4; iy++) {
521
4.49M
          for (size_t ix = 0; ix < 8; ix++) {
522
3.99M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
3.99M
          }
524
499k
        }
525
124k
      }
526
62.3k
      float block0 = coefficients[0];
527
62.3k
      float block1 = coefficients[8];
528
62.3k
      coefficients[0] = (block0 + block1) * 0.5f;
529
62.3k
      coefficients[8] = (block0 - block1) * 0.5f;
530
62.3k
      break;
531
0
    }
532
693
    case Type::DCT4X4: {
533
2.07k
      for (size_t y = 0; y < 2; y++) {
534
4.15k
        for (size_t x = 0; x < 2; x++) {
535
2.77k
          HWY_ALIGN float block[4 * 4];
536
2.77k
          ComputeScaledDCT<4, 4>()(
537
2.77k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
2.77k
              block, scratch_space);
539
13.8k
          for (size_t iy = 0; iy < 4; iy++) {
540
55.4k
            for (size_t ix = 0; ix < 4; ix++) {
541
44.3k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
44.3k
            }
543
11.0k
          }
544
2.77k
        }
545
1.38k
      }
546
693
      float block00 = coefficients[0];
547
693
      float block01 = coefficients[1];
548
693
      float block10 = coefficients[8];
549
693
      float block11 = coefficients[9];
550
693
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
693
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
693
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
693
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
693
      break;
555
0
    }
556
1.85M
    case Type::DCT2X2: {
557
1.85M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
1.85M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
1.85M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
1.85M
      break;
561
0
    }
562
147k
    case Type::DCT16X16: {
563
147k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
147k
                                 scratch_space);
565
147k
      break;
566
0
    }
567
208k
    case Type::DCT16X8: {
568
208k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
208k
                                scratch_space);
570
208k
      break;
571
0
    }
572
224k
    case Type::DCT8X16: {
573
224k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
224k
                                scratch_space);
575
224k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
45.6k
    case Type::DCT32X16: {
588
45.6k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
45.6k
                                 scratch_space);
590
45.6k
      break;
591
0
    }
592
44.5k
    case Type::DCT16X32: {
593
44.5k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
44.5k
                                 scratch_space);
595
44.5k
      break;
596
0
    }
597
78.6k
    case Type::DCT32X32: {
598
78.6k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
78.6k
                                 scratch_space);
600
78.6k
      break;
601
0
    }
602
1.37M
    case Type::DCT: {
603
1.37M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
1.37M
                               scratch_space);
605
1.37M
      break;
606
0
    }
607
95.2k
    case Type::AFV0: {
608
95.2k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
95.2k
      break;
610
0
    }
611
51.4k
    case Type::AFV1: {
612
51.4k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
51.4k
      break;
614
0
    }
615
65.1k
    case Type::AFV2: {
616
65.1k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
65.1k
      break;
618
0
    }
619
67.1k
    case Type::AFV3: {
620
67.1k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
67.1k
      break;
622
0
    }
623
49.1k
    case Type::DCT64X64: {
624
49.1k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
49.1k
                                 scratch_space);
626
49.1k
      break;
627
0
    }
628
12.0k
    case Type::DCT64X32: {
629
12.0k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
12.0k
                                 scratch_space);
631
12.0k
      break;
632
0
    }
633
5.11k
    case Type::DCT32X64: {
634
5.11k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
5.11k
                                 scratch_space);
636
5.11k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
5.17M
  }
669
5.17M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
16.6M
                                          float* JXL_RESTRICT scratch_space) {
462
16.6M
  using Type = AcStrategyType;
463
16.6M
  switch (strategy) {
464
658k
    case Type::IDENTITY: {
465
1.97M
      for (size_t y = 0; y < 2; y++) {
466
3.95M
        for (size_t x = 0; x < 2; x++) {
467
2.63M
          float block_dc = 0;
468
13.1M
          for (size_t iy = 0; iy < 4; iy++) {
469
52.6M
            for (size_t ix = 0; ix < 4; ix++) {
470
42.1M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
42.1M
            }
472
10.5M
          }
473
2.63M
          block_dc *= 1.0f / 16;
474
13.1M
          for (size_t iy = 0; iy < 4; iy++) {
475
52.6M
            for (size_t ix = 0; ix < 4; ix++) {
476
42.1M
              if (ix == 1 && iy == 1) continue;
477
39.5M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
39.5M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
39.5M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
39.5M
            }
481
10.5M
          }
482
2.63M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
2.63M
          coefficients[y * 8 + x] = block_dc;
484
2.63M
        }
485
1.31M
      }
486
658k
      float block00 = coefficients[0];
487
658k
      float block01 = coefficients[1];
488
658k
      float block10 = coefficients[8];
489
658k
      float block11 = coefficients[9];
490
658k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
658k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
658k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
658k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
658k
      break;
495
0
    }
496
134k
    case Type::DCT8X4: {
497
403k
      for (size_t x = 0; x < 2; x++) {
498
268k
        HWY_ALIGN float block[4 * 8];
499
268k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
268k
                                 scratch_space);
501
1.34M
        for (size_t iy = 0; iy < 4; iy++) {
502
9.67M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
8.60M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
8.60M
          }
506
1.07M
        }
507
268k
      }
508
134k
      float block0 = coefficients[0];
509
134k
      float block1 = coefficients[8];
510
134k
      coefficients[0] = (block0 + block1) * 0.5f;
511
134k
      coefficients[8] = (block0 - block1) * 0.5f;
512
134k
      break;
513
0
    }
514
62.3k
    case Type::DCT4X8: {
515
187k
      for (size_t y = 0; y < 2; y++) {
516
124k
        HWY_ALIGN float block[4 * 8];
517
124k
        ComputeScaledDCT<4, 8>()(
518
124k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
124k
            scratch_space);
520
623k
        for (size_t iy = 0; iy < 4; iy++) {
521
4.49M
          for (size_t ix = 0; ix < 8; ix++) {
522
3.99M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
3.99M
          }
524
499k
        }
525
124k
      }
526
62.3k
      float block0 = coefficients[0];
527
62.3k
      float block1 = coefficients[8];
528
62.3k
      coefficients[0] = (block0 + block1) * 0.5f;
529
62.3k
      coefficients[8] = (block0 - block1) * 0.5f;
530
62.3k
      break;
531
0
    }
532
693
    case Type::DCT4X4: {
533
2.07k
      for (size_t y = 0; y < 2; y++) {
534
4.15k
        for (size_t x = 0; x < 2; x++) {
535
2.77k
          HWY_ALIGN float block[4 * 4];
536
2.77k
          ComputeScaledDCT<4, 4>()(
537
2.77k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
2.77k
              block, scratch_space);
539
13.8k
          for (size_t iy = 0; iy < 4; iy++) {
540
55.4k
            for (size_t ix = 0; ix < 4; ix++) {
541
44.3k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
44.3k
            }
543
11.0k
          }
544
2.77k
        }
545
1.38k
      }
546
693
      float block00 = coefficients[0];
547
693
      float block01 = coefficients[1];
548
693
      float block10 = coefficients[8];
549
693
      float block11 = coefficients[9];
550
693
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
693
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
693
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
693
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
693
      break;
555
0
    }
556
1.85M
    case Type::DCT2X2: {
557
1.85M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
1.85M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
1.85M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
1.85M
      break;
561
0
    }
562
147k
    case Type::DCT16X16: {
563
147k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
147k
                                 scratch_space);
565
147k
      break;
566
0
    }
567
208k
    case Type::DCT16X8: {
568
208k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
208k
                                scratch_space);
570
208k
      break;
571
0
    }
572
224k
    case Type::DCT8X16: {
573
224k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
224k
                                scratch_space);
575
224k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
45.6k
    case Type::DCT32X16: {
588
45.6k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
45.6k
                                 scratch_space);
590
45.6k
      break;
591
0
    }
592
44.5k
    case Type::DCT16X32: {
593
44.5k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
44.5k
                                 scratch_space);
595
44.5k
      break;
596
0
    }
597
78.6k
    case Type::DCT32X32: {
598
78.6k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
78.6k
                                 scratch_space);
600
78.6k
      break;
601
0
    }
602
12.8M
    case Type::DCT: {
603
12.8M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
12.8M
                               scratch_space);
605
12.8M
      break;
606
0
    }
607
95.2k
    case Type::AFV0: {
608
95.2k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
95.2k
      break;
610
0
    }
611
51.4k
    case Type::AFV1: {
612
51.4k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
51.4k
      break;
614
0
    }
615
65.1k
    case Type::AFV2: {
616
65.1k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
65.1k
      break;
618
0
    }
619
67.1k
    case Type::AFV3: {
620
67.1k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
67.1k
      break;
622
0
    }
623
49.1k
    case Type::DCT64X64: {
624
49.1k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
49.1k
                                 scratch_space);
626
49.1k
      break;
627
0
    }
628
12.0k
    case Type::DCT64X32: {
629
12.0k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
12.0k
                                 scratch_space);
631
12.0k
      break;
632
0
    }
633
5.11k
    case Type::DCT32X64: {
634
5.11k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
5.11k
                                 scratch_space);
636
5.11k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
16.6M
  }
669
16.6M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
143M
                                          float* JXL_RESTRICT scratch_space) {
462
143M
  using Type = AcStrategyType;
463
143M
  switch (strategy) {
464
11.4M
    case Type::IDENTITY: {
465
34.4M
      for (size_t y = 0; y < 2; y++) {
466
68.9M
        for (size_t x = 0; x < 2; x++) {
467
45.9M
          float block_dc = 0;
468
229M
          for (size_t iy = 0; iy < 4; iy++) {
469
919M
            for (size_t ix = 0; ix < 4; ix++) {
470
735M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
735M
            }
472
183M
          }
473
45.9M
          block_dc *= 1.0f / 16;
474
229M
          for (size_t iy = 0; iy < 4; iy++) {
475
919M
            for (size_t ix = 0; ix < 4; ix++) {
476
735M
              if (ix == 1 && iy == 1) continue;
477
689M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
689M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
689M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
689M
            }
481
183M
          }
482
45.9M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
45.9M
          coefficients[y * 8 + x] = block_dc;
484
45.9M
        }
485
22.9M
      }
486
11.4M
      float block00 = coefficients[0];
487
11.4M
      float block01 = coefficients[1];
488
11.4M
      float block10 = coefficients[8];
489
11.4M
      float block11 = coefficients[9];
490
11.4M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
11.4M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
11.4M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
11.4M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
11.4M
      break;
495
0
    }
496
11.4M
    case Type::DCT8X4: {
497
34.4M
      for (size_t x = 0; x < 2; x++) {
498
22.9M
        HWY_ALIGN float block[4 * 8];
499
22.9M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
22.9M
                                 scratch_space);
501
114M
        for (size_t iy = 0; iy < 4; iy++) {
502
827M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
735M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
735M
          }
506
91.9M
        }
507
22.9M
      }
508
11.4M
      float block0 = coefficients[0];
509
11.4M
      float block1 = coefficients[8];
510
11.4M
      coefficients[0] = (block0 + block1) * 0.5f;
511
11.4M
      coefficients[8] = (block0 - block1) * 0.5f;
512
11.4M
      break;
513
0
    }
514
11.4M
    case Type::DCT4X8: {
515
34.4M
      for (size_t y = 0; y < 2; y++) {
516
22.9M
        HWY_ALIGN float block[4 * 8];
517
22.9M
        ComputeScaledDCT<4, 8>()(
518
22.9M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
22.9M
            scratch_space);
520
114M
        for (size_t iy = 0; iy < 4; iy++) {
521
827M
          for (size_t ix = 0; ix < 8; ix++) {
522
735M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
735M
          }
524
91.9M
        }
525
22.9M
      }
526
11.4M
      float block0 = coefficients[0];
527
11.4M
      float block1 = coefficients[8];
528
11.4M
      coefficients[0] = (block0 + block1) * 0.5f;
529
11.4M
      coefficients[8] = (block0 - block1) * 0.5f;
530
11.4M
      break;
531
0
    }
532
11.4M
    case Type::DCT4X4: {
533
34.4M
      for (size_t y = 0; y < 2; y++) {
534
68.9M
        for (size_t x = 0; x < 2; x++) {
535
45.9M
          HWY_ALIGN float block[4 * 4];
536
45.9M
          ComputeScaledDCT<4, 4>()(
537
45.9M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
45.9M
              block, scratch_space);
539
229M
          for (size_t iy = 0; iy < 4; iy++) {
540
919M
            for (size_t ix = 0; ix < 4; ix++) {
541
735M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
735M
            }
543
183M
          }
544
45.9M
        }
545
22.9M
      }
546
11.4M
      float block00 = coefficients[0];
547
11.4M
      float block01 = coefficients[1];
548
11.4M
      float block10 = coefficients[8];
549
11.4M
      float block11 = coefficients[9];
550
11.4M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
11.4M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
11.4M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
11.4M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
11.4M
      break;
555
0
    }
556
11.4M
    case Type::DCT2X2: {
557
11.4M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
11.4M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
11.4M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
11.4M
      break;
561
0
    }
562
4.64M
    case Type::DCT16X16: {
563
4.64M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
4.64M
                                 scratch_space);
565
4.64M
      break;
566
0
    }
567
9.16M
    case Type::DCT16X8: {
568
9.16M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
9.16M
                                scratch_space);
570
9.16M
      break;
571
0
    }
572
9.14M
    case Type::DCT8X16: {
573
9.14M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
9.14M
                                scratch_space);
575
9.14M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
1.81M
    case Type::DCT32X16: {
588
1.81M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
1.81M
                                 scratch_space);
590
1.81M
      break;
591
0
    }
592
1.79M
    case Type::DCT16X32: {
593
1.79M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
1.79M
                                 scratch_space);
595
1.79M
      break;
596
0
    }
597
919k
    case Type::DCT32X32: {
598
919k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
919k
                                 scratch_space);
600
919k
      break;
601
0
    }
602
11.4M
    case Type::DCT: {
603
11.4M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
11.4M
                               scratch_space);
605
11.4M
      break;
606
0
    }
607
11.4M
    case Type::AFV0: {
608
11.4M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
11.4M
      break;
610
0
    }
611
11.4M
    case Type::AFV1: {
612
11.4M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
11.4M
      break;
614
0
    }
615
11.4M
    case Type::AFV2: {
616
11.4M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
11.4M
      break;
618
0
    }
619
11.4M
    case Type::AFV3: {
620
11.4M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
11.4M
      break;
622
0
    }
623
159k
    case Type::DCT64X64: {
624
159k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
159k
                                 scratch_space);
626
159k
      break;
627
0
    }
628
548k
    case Type::DCT64X32: {
629
548k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
548k
                                 scratch_space);
631
548k
      break;
632
0
    }
633
329k
    case Type::DCT32X64: {
634
329k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
329k
                                 scratch_space);
636
329k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
143M
  }
669
143M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
670
671
// `scratch_space` should be at least 4 * kMaxBlocks * kMaxBlocks elements.
672
HWY_MAYBE_UNUSED void DCFromLowestFrequencies(const AcStrategyType strategy,
673
                                              const float* block, float* dc,
674
                                              size_t dc_stride,
675
21.8M
                                              float* scratch_space) {
676
21.8M
  using Type = AcStrategyType;
677
21.8M
  switch (strategy) {
678
417k
    case Type::DCT16X8: {
679
417k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
417k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
417k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
417k
      break;
683
0
    }
684
449k
    case Type::DCT8X16: {
685
449k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
449k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
449k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
449k
      break;
689
0
    }
690
295k
    case Type::DCT16X16: {
691
295k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
295k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
295k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
295k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
91.2k
    case Type::DCT32X16: {
709
91.2k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
91.2k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
91.2k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
91.2k
      break;
713
0
    }
714
89.0k
    case Type::DCT16X32: {
715
89.0k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
89.0k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
89.0k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
89.0k
      break;
719
0
    }
720
157k
    case Type::DCT32X32: {
721
157k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
157k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
157k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
157k
      break;
725
0
    }
726
24.1k
    case Type::DCT64X32: {
727
24.1k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
24.1k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
24.1k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
24.1k
      break;
731
0
    }
732
10.2k
    case Type::DCT32X64: {
733
10.2k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
10.2k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
10.2k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
10.2k
      break;
737
0
    }
738
98.3k
    case Type::DCT64X64: {
739
98.3k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
98.3k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
98.3k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
98.3k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
14.2M
    case Type::DCT:
787
17.9M
    case Type::DCT2X2:
788
17.9M
    case Type::DCT4X4:
789
18.0M
    case Type::DCT4X8:
790
18.3M
    case Type::DCT8X4:
791
18.5M
    case Type::AFV0:
792
18.6M
    case Type::AFV1:
793
18.7M
    case Type::AFV2:
794
18.9M
    case Type::AFV3:
795
20.2M
    case Type::IDENTITY:
796
20.2M
      dc[0] = block[0];
797
20.2M
      break;
798
21.8M
  }
799
21.8M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
5.17M
                                              float* scratch_space) {
676
5.17M
  using Type = AcStrategyType;
677
5.17M
  switch (strategy) {
678
208k
    case Type::DCT16X8: {
679
208k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
208k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
208k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
208k
      break;
683
0
    }
684
224k
    case Type::DCT8X16: {
685
224k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
224k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
224k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
224k
      break;
689
0
    }
690
147k
    case Type::DCT16X16: {
691
147k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
147k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
147k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
147k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
45.6k
    case Type::DCT32X16: {
709
45.6k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
45.6k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
45.6k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
45.6k
      break;
713
0
    }
714
44.5k
    case Type::DCT16X32: {
715
44.5k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
44.5k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
44.5k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
44.5k
      break;
719
0
    }
720
78.6k
    case Type::DCT32X32: {
721
78.6k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
78.6k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
78.6k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
78.6k
      break;
725
0
    }
726
12.0k
    case Type::DCT64X32: {
727
12.0k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
12.0k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
12.0k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
12.0k
      break;
731
0
    }
732
5.11k
    case Type::DCT32X64: {
733
5.11k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
5.11k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
5.11k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
5.11k
      break;
737
0
    }
738
49.1k
    case Type::DCT64X64: {
739
49.1k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
49.1k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
49.1k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
49.1k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
1.37M
    case Type::DCT:
787
3.22M
    case Type::DCT2X2:
788
3.22M
    case Type::DCT4X4:
789
3.29M
    case Type::DCT4X8:
790
3.42M
    case Type::DCT8X4:
791
3.51M
    case Type::AFV0:
792
3.57M
    case Type::AFV1:
793
3.63M
    case Type::AFV2:
794
3.70M
    case Type::AFV3:
795
4.36M
    case Type::IDENTITY:
796
4.36M
      dc[0] = block[0];
797
4.36M
      break;
798
5.17M
  }
799
5.17M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
16.6M
                                              float* scratch_space) {
676
16.6M
  using Type = AcStrategyType;
677
16.6M
  switch (strategy) {
678
208k
    case Type::DCT16X8: {
679
208k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
208k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
208k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
208k
      break;
683
0
    }
684
224k
    case Type::DCT8X16: {
685
224k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
224k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
224k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
224k
      break;
689
0
    }
690
147k
    case Type::DCT16X16: {
691
147k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
147k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
147k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
147k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
45.6k
    case Type::DCT32X16: {
709
45.6k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
45.6k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
45.6k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
45.6k
      break;
713
0
    }
714
44.5k
    case Type::DCT16X32: {
715
44.5k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
44.5k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
44.5k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
44.5k
      break;
719
0
    }
720
78.6k
    case Type::DCT32X32: {
721
78.6k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
78.6k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
78.6k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
78.6k
      break;
725
0
    }
726
12.0k
    case Type::DCT64X32: {
727
12.0k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
12.0k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
12.0k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
12.0k
      break;
731
0
    }
732
5.11k
    case Type::DCT32X64: {
733
5.11k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
5.11k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
5.11k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
5.11k
      break;
737
0
    }
738
49.1k
    case Type::DCT64X64: {
739
49.1k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
49.1k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
49.1k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
49.1k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
12.8M
    case Type::DCT:
787
14.7M
    case Type::DCT2X2:
788
14.7M
    case Type::DCT4X4:
789
14.7M
    case Type::DCT4X8:
790
14.9M
    case Type::DCT8X4:
791
15.0M
    case Type::AFV0:
792
15.0M
    case Type::AFV1:
793
15.1M
    case Type::AFV2:
794
15.2M
    case Type::AFV3:
795
15.8M
    case Type::IDENTITY:
796
15.8M
      dc[0] = block[0];
797
15.8M
      break;
798
16.6M
  }
799
16.6M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
800
801
}  // namespace
802
// NOLINTNEXTLINE(google-readability-namespace-comments)
803
}  // namespace HWY_NAMESPACE
804
}  // namespace jxl
805
HWY_AFTER_NAMESPACE();
806
807
#endif  // LIB_JXL_ENC_TRANSFORMS_INL_H_