Coverage Report

Created: 2026-06-07 07:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/enc_transforms-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/base/compiler_specific.h"
7
#include "lib/jxl/frame_dimensions.h"
8
9
#if defined(LIB_JXL_ENC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
10
#ifdef LIB_JXL_ENC_TRANSFORMS_INL_H_
11
#undef LIB_JXL_ENC_TRANSFORMS_INL_H_
12
#else
13
#define LIB_JXL_ENC_TRANSFORMS_INL_H_
14
#endif
15
16
#include <cstddef>
17
#include <cstdint>
18
#include <hwy/highway.h>
19
20
#include "lib/jxl/ac_strategy.h"
21
#include "lib/jxl/dct-inl.h"
22
#include "lib/jxl/dct_scales.h"
23
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
27
enum class AcStrategyType : uint32_t;
28
29
namespace HWY_NAMESPACE {
30
namespace {
31
32
constexpr size_t kMaxBlocks = 32;
33
34
// Inverse of ReinterpretingDCT.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
HWY_INLINE void ReinterpretingIDCT(const float* input,
38
                                   const size_t input_stride, float* output,
39
2.02M
                                   const size_t output_stride, float* scratch) {
40
2.02M
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
2.02M
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
2.02M
  float* block = scratch;
43
2.02M
  if (ROWS < COLS) {
44
1.49M
    for (size_t y = 0; y < LF_ROWS; y++) {
45
3.30M
      for (size_t x = 0; x < LF_COLS; x++) {
46
2.47M
        block[y * COLS + x] = input[y * input_stride + x] *
47
2.47M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
2.47M
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
2.47M
      }
50
830k
    }
51
1.36M
  } else {
52
4.84M
    for (size_t y = 0; y < LF_COLS; y++) {
53
19.3M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
15.9M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
15.9M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
15.9M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
15.9M
      }
58
3.47M
    }
59
1.36M
  }
60
61
2.02M
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
2.02M
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
2.02M
                                  scratch_space);
64
2.02M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
250k
                                   const size_t output_stride, float* scratch) {
40
250k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
250k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
250k
  float* block = scratch;
43
250k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
250k
  } else {
52
501k
    for (size_t y = 0; y < LF_COLS; y++) {
53
752k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
501k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
501k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
501k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
501k
      }
58
250k
    }
59
250k
  }
60
61
250k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
250k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
250k
                                  scratch_space);
64
250k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
265k
                                   const size_t output_stride, float* scratch) {
40
265k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
265k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
265k
  float* block = scratch;
43
265k
  if (ROWS < COLS) {
44
531k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
796k
      for (size_t x = 0; x < LF_COLS; x++) {
46
531k
        block[y * COLS + x] = input[y * input_stride + x] *
47
531k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
531k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
531k
      }
50
265k
    }
51
265k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
265k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
265k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
265k
                                  scratch_space);
64
265k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
184k
                                   const size_t output_stride, float* scratch) {
40
184k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
184k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
184k
  float* block = scratch;
43
184k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
184k
  } else {
52
554k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.10M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
739k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
739k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
739k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
739k
      }
58
369k
    }
59
184k
  }
60
61
184k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
184k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
184k
                                  scratch_space);
64
184k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
58.8k
                                   const size_t output_stride, float* scratch) {
40
58.8k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
58.8k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
58.8k
  float* block = scratch;
43
58.8k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
58.8k
  } else {
52
176k
    for (size_t y = 0; y < LF_COLS; y++) {
53
588k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
470k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
470k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
470k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
470k
      }
58
117k
    }
59
58.8k
  }
60
61
58.8k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
58.8k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
58.8k
                                  scratch_space);
64
58.8k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
61.2k
                                   const size_t output_stride, float* scratch) {
40
61.2k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
61.2k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
61.2k
  float* block = scratch;
43
61.2k
  if (ROWS < COLS) {
44
183k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
612k
      for (size_t x = 0; x < LF_COLS; x++) {
46
490k
        block[y * COLS + x] = input[y * input_stride + x] *
47
490k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
490k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
490k
      }
50
122k
    }
51
61.2k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
61.2k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
61.2k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
61.2k
                                  scratch_space);
64
61.2k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
110k
                                   const size_t output_stride, float* scratch) {
40
110k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
110k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
110k
  float* block = scratch;
43
110k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
110k
  } else {
52
551k
    for (size_t y = 0; y < LF_COLS; y++) {
53
2.20M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
1.76M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
1.76M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
1.76M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
1.76M
      }
58
441k
    }
59
110k
  }
60
61
110k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
110k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
110k
                                  scratch_space);
64
110k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
12.7k
                                   const size_t output_stride, float* scratch) {
40
12.7k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
12.7k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
12.7k
  float* block = scratch;
43
12.7k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
12.7k
  } else {
52
63.8k
    for (size_t y = 0; y < LF_COLS; y++) {
53
459k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
408k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
408k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
408k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
408k
      }
58
51.1k
    }
59
12.7k
  }
60
61
12.7k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
12.7k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
12.7k
                                  scratch_space);
64
12.7k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
6.81k
                                   const size_t output_stride, float* scratch) {
40
6.81k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
6.81k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
6.81k
  float* block = scratch;
43
6.81k
  if (ROWS < COLS) {
44
34.0k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
245k
      for (size_t x = 0; x < LF_COLS; x++) {
46
218k
        block[y * COLS + x] = input[y * input_stride + x] *
47
218k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
218k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
218k
      }
50
27.2k
    }
51
6.81k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
6.81k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
6.81k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
6.81k
                                  scratch_space);
64
6.81k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
63.6k
                                   const size_t output_stride, float* scratch) {
40
63.6k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
63.6k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
63.6k
  float* block = scratch;
43
63.6k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
63.6k
  } else {
52
572k
    for (size_t y = 0; y < LF_COLS; y++) {
53
4.58M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
4.07M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
4.07M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
4.07M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
4.07M
      }
58
509k
    }
59
63.6k
  }
60
61
63.6k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
63.6k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
63.6k
                                  scratch_space);
64
63.6k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
250k
                                   const size_t output_stride, float* scratch) {
40
250k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
250k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
250k
  float* block = scratch;
43
250k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
250k
  } else {
52
501k
    for (size_t y = 0; y < LF_COLS; y++) {
53
752k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
501k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
501k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
501k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
501k
      }
58
250k
    }
59
250k
  }
60
61
250k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
250k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
250k
                                  scratch_space);
64
250k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
265k
                                   const size_t output_stride, float* scratch) {
40
265k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
265k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
265k
  float* block = scratch;
43
265k
  if (ROWS < COLS) {
44
531k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
796k
      for (size_t x = 0; x < LF_COLS; x++) {
46
531k
        block[y * COLS + x] = input[y * input_stride + x] *
47
531k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
531k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
531k
      }
50
265k
    }
51
265k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
265k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
265k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
265k
                                  scratch_space);
64
265k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
184k
                                   const size_t output_stride, float* scratch) {
40
184k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
184k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
184k
  float* block = scratch;
43
184k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
184k
  } else {
52
554k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.10M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
739k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
739k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
739k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
739k
      }
58
369k
    }
59
184k
  }
60
61
184k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
184k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
184k
                                  scratch_space);
64
184k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
58.8k
                                   const size_t output_stride, float* scratch) {
40
58.8k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
58.8k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
58.8k
  float* block = scratch;
43
58.8k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
58.8k
  } else {
52
176k
    for (size_t y = 0; y < LF_COLS; y++) {
53
588k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
470k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
470k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
470k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
470k
      }
58
117k
    }
59
58.8k
  }
60
61
58.8k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
58.8k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
58.8k
                                  scratch_space);
64
58.8k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
61.2k
                                   const size_t output_stride, float* scratch) {
40
61.2k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
61.2k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
61.2k
  float* block = scratch;
43
61.2k
  if (ROWS < COLS) {
44
183k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
612k
      for (size_t x = 0; x < LF_COLS; x++) {
46
490k
        block[y * COLS + x] = input[y * input_stride + x] *
47
490k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
490k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
490k
      }
50
122k
    }
51
61.2k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
61.2k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
61.2k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
61.2k
                                  scratch_space);
64
61.2k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
110k
                                   const size_t output_stride, float* scratch) {
40
110k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
110k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
110k
  float* block = scratch;
43
110k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
110k
  } else {
52
551k
    for (size_t y = 0; y < LF_COLS; y++) {
53
2.20M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
1.76M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
1.76M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
1.76M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
1.76M
      }
58
441k
    }
59
110k
  }
60
61
110k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
110k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
110k
                                  scratch_space);
64
110k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
12.7k
                                   const size_t output_stride, float* scratch) {
40
12.7k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
12.7k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
12.7k
  float* block = scratch;
43
12.7k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
12.7k
  } else {
52
63.8k
    for (size_t y = 0; y < LF_COLS; y++) {
53
459k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
408k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
408k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
408k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
408k
      }
58
51.1k
    }
59
12.7k
  }
60
61
12.7k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
12.7k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
12.7k
                                  scratch_space);
64
12.7k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
6.81k
                                   const size_t output_stride, float* scratch) {
40
6.81k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
6.81k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
6.81k
  float* block = scratch;
43
6.81k
  if (ROWS < COLS) {
44
34.0k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
245k
      for (size_t x = 0; x < LF_COLS; x++) {
46
218k
        block[y * COLS + x] = input[y * input_stride + x] *
47
218k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
218k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
218k
      }
50
27.2k
    }
51
6.81k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
6.81k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
6.81k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
6.81k
                                  scratch_space);
64
6.81k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
63.6k
                                   const size_t output_stride, float* scratch) {
40
63.6k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
63.6k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
63.6k
  float* block = scratch;
43
63.6k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
63.6k
  } else {
52
572k
    for (size_t y = 0; y < LF_COLS; y++) {
53
4.58M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
4.07M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
4.07M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
4.07M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
4.07M
      }
58
509k
    }
59
63.6k
  }
60
61
63.6k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
63.6k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
63.6k
                                  scratch_space);
64
63.6k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
65
66
template <size_t S>
67
57.4M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
57.4M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
57.4M
  static_assert(S % 2 == 0, "S should be even");
70
57.4M
  float temp[kDCTBlockSize];
71
57.4M
  constexpr size_t num_2x2 = S / 2;
72
191M
  for (size_t y = 0; y < num_2x2; y++) {
73
536M
    for (size_t x = 0; x < num_2x2; x++) {
74
402M
      float c00 = block[y * 2 * stride + x * 2];
75
402M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
402M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
402M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
402M
      float r00 = c00 + c01 + c10 + c11;
79
402M
      float r01 = c00 + c01 - c10 - c11;
80
402M
      float r10 = c00 - c01 + c10 - c11;
81
402M
      float r11 = c00 - c01 - c10 + c11;
82
402M
      r00 *= 0.25f;
83
402M
      r01 *= 0.25f;
84
402M
      r10 *= 0.25f;
85
402M
      r11 *= 0.25f;
86
402M
      temp[y * kBlockDim + x] = r00;
87
402M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
402M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
402M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
402M
    }
91
134M
  }
92
325M
  for (size_t y = 0; y < S; y++) {
93
1.87G
    for (size_t x = 0; x < S; x++) {
94
1.60G
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
1.60G
    }
96
268M
  }
97
57.4M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.27M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
2.27M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.27M
  static_assert(S % 2 == 0, "S should be even");
70
2.27M
  float temp[kDCTBlockSize];
71
2.27M
  constexpr size_t num_2x2 = S / 2;
72
11.3M
  for (size_t y = 0; y < num_2x2; y++) {
73
45.4M
    for (size_t x = 0; x < num_2x2; x++) {
74
36.3M
      float c00 = block[y * 2 * stride + x * 2];
75
36.3M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
36.3M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
36.3M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
36.3M
      float r00 = c00 + c01 + c10 + c11;
79
36.3M
      float r01 = c00 + c01 - c10 - c11;
80
36.3M
      float r10 = c00 - c01 + c10 - c11;
81
36.3M
      float r11 = c00 - c01 - c10 + c11;
82
36.3M
      r00 *= 0.25f;
83
36.3M
      r01 *= 0.25f;
84
36.3M
      r10 *= 0.25f;
85
36.3M
      r11 *= 0.25f;
86
36.3M
      temp[y * kBlockDim + x] = r00;
87
36.3M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
36.3M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
36.3M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
36.3M
    }
91
9.09M
  }
92
20.4M
  for (size_t y = 0; y < S; y++) {
93
163M
    for (size_t x = 0; x < S; x++) {
94
145M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
145M
    }
96
18.1M
  }
97
2.27M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.27M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
2.27M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.27M
  static_assert(S % 2 == 0, "S should be even");
70
2.27M
  float temp[kDCTBlockSize];
71
2.27M
  constexpr size_t num_2x2 = S / 2;
72
6.82M
  for (size_t y = 0; y < num_2x2; y++) {
73
13.6M
    for (size_t x = 0; x < num_2x2; x++) {
74
9.09M
      float c00 = block[y * 2 * stride + x * 2];
75
9.09M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
9.09M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
9.09M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
9.09M
      float r00 = c00 + c01 + c10 + c11;
79
9.09M
      float r01 = c00 + c01 - c10 - c11;
80
9.09M
      float r10 = c00 - c01 + c10 - c11;
81
9.09M
      float r11 = c00 - c01 - c10 + c11;
82
9.09M
      r00 *= 0.25f;
83
9.09M
      r01 *= 0.25f;
84
9.09M
      r10 *= 0.25f;
85
9.09M
      r11 *= 0.25f;
86
9.09M
      temp[y * kBlockDim + x] = r00;
87
9.09M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
9.09M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
9.09M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
9.09M
    }
91
4.54M
  }
92
11.3M
  for (size_t y = 0; y < S; y++) {
93
45.4M
    for (size_t x = 0; x < S; x++) {
94
36.3M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
36.3M
    }
96
9.09M
  }
97
2.27M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.27M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
2.27M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.27M
  static_assert(S % 2 == 0, "S should be even");
70
2.27M
  float temp[kDCTBlockSize];
71
2.27M
  constexpr size_t num_2x2 = S / 2;
72
4.54M
  for (size_t y = 0; y < num_2x2; y++) {
73
4.54M
    for (size_t x = 0; x < num_2x2; x++) {
74
2.27M
      float c00 = block[y * 2 * stride + x * 2];
75
2.27M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
2.27M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
2.27M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
2.27M
      float r00 = c00 + c01 + c10 + c11;
79
2.27M
      float r01 = c00 + c01 - c10 - c11;
80
2.27M
      float r10 = c00 - c01 + c10 - c11;
81
2.27M
      float r11 = c00 - c01 - c10 + c11;
82
2.27M
      r00 *= 0.25f;
83
2.27M
      r01 *= 0.25f;
84
2.27M
      r10 *= 0.25f;
85
2.27M
      r11 *= 0.25f;
86
2.27M
      temp[y * kBlockDim + x] = r00;
87
2.27M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
2.27M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
2.27M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
2.27M
    }
91
2.27M
  }
92
6.82M
  for (size_t y = 0; y < S; y++) {
93
13.6M
    for (size_t x = 0; x < S; x++) {
94
9.09M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
9.09M
    }
96
4.54M
  }
97
2.27M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.27M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
2.27M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.27M
  static_assert(S % 2 == 0, "S should be even");
70
2.27M
  float temp[kDCTBlockSize];
71
2.27M
  constexpr size_t num_2x2 = S / 2;
72
11.3M
  for (size_t y = 0; y < num_2x2; y++) {
73
45.4M
    for (size_t x = 0; x < num_2x2; x++) {
74
36.3M
      float c00 = block[y * 2 * stride + x * 2];
75
36.3M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
36.3M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
36.3M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
36.3M
      float r00 = c00 + c01 + c10 + c11;
79
36.3M
      float r01 = c00 + c01 - c10 - c11;
80
36.3M
      float r10 = c00 - c01 + c10 - c11;
81
36.3M
      float r11 = c00 - c01 - c10 + c11;
82
36.3M
      r00 *= 0.25f;
83
36.3M
      r01 *= 0.25f;
84
36.3M
      r10 *= 0.25f;
85
36.3M
      r11 *= 0.25f;
86
36.3M
      temp[y * kBlockDim + x] = r00;
87
36.3M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
36.3M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
36.3M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
36.3M
    }
91
9.09M
  }
92
20.4M
  for (size_t y = 0; y < S; y++) {
93
163M
    for (size_t x = 0; x < S; x++) {
94
145M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
145M
    }
96
18.1M
  }
97
2.27M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.27M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
2.27M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.27M
  static_assert(S % 2 == 0, "S should be even");
70
2.27M
  float temp[kDCTBlockSize];
71
2.27M
  constexpr size_t num_2x2 = S / 2;
72
6.82M
  for (size_t y = 0; y < num_2x2; y++) {
73
13.6M
    for (size_t x = 0; x < num_2x2; x++) {
74
9.09M
      float c00 = block[y * 2 * stride + x * 2];
75
9.09M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
9.09M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
9.09M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
9.09M
      float r00 = c00 + c01 + c10 + c11;
79
9.09M
      float r01 = c00 + c01 - c10 - c11;
80
9.09M
      float r10 = c00 - c01 + c10 - c11;
81
9.09M
      float r11 = c00 - c01 - c10 + c11;
82
9.09M
      r00 *= 0.25f;
83
9.09M
      r01 *= 0.25f;
84
9.09M
      r10 *= 0.25f;
85
9.09M
      r11 *= 0.25f;
86
9.09M
      temp[y * kBlockDim + x] = r00;
87
9.09M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
9.09M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
9.09M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
9.09M
    }
91
4.54M
  }
92
11.3M
  for (size_t y = 0; y < S; y++) {
93
45.4M
    for (size_t x = 0; x < S; x++) {
94
36.3M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
36.3M
    }
96
9.09M
  }
97
2.27M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
2.27M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
2.27M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
2.27M
  static_assert(S % 2 == 0, "S should be even");
70
2.27M
  float temp[kDCTBlockSize];
71
2.27M
  constexpr size_t num_2x2 = S / 2;
72
4.54M
  for (size_t y = 0; y < num_2x2; y++) {
73
4.54M
    for (size_t x = 0; x < num_2x2; x++) {
74
2.27M
      float c00 = block[y * 2 * stride + x * 2];
75
2.27M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
2.27M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
2.27M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
2.27M
      float r00 = c00 + c01 + c10 + c11;
79
2.27M
      float r01 = c00 + c01 - c10 - c11;
80
2.27M
      float r10 = c00 - c01 + c10 - c11;
81
2.27M
      float r11 = c00 - c01 - c10 + c11;
82
2.27M
      r00 *= 0.25f;
83
2.27M
      r01 *= 0.25f;
84
2.27M
      r10 *= 0.25f;
85
2.27M
      r11 *= 0.25f;
86
2.27M
      temp[y * kBlockDim + x] = r00;
87
2.27M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
2.27M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
2.27M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
2.27M
    }
91
2.27M
  }
92
6.82M
  for (size_t y = 0; y < S; y++) {
93
13.6M
    for (size_t x = 0; x < S; x++) {
94
9.09M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
9.09M
    }
96
4.54M
  }
97
2.27M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
14.6M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
14.6M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
14.6M
  static_assert(S % 2 == 0, "S should be even");
70
14.6M
  float temp[kDCTBlockSize];
71
14.6M
  constexpr size_t num_2x2 = S / 2;
72
73.0M
  for (size_t y = 0; y < num_2x2; y++) {
73
292M
    for (size_t x = 0; x < num_2x2; x++) {
74
233M
      float c00 = block[y * 2 * stride + x * 2];
75
233M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
233M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
233M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
233M
      float r00 = c00 + c01 + c10 + c11;
79
233M
      float r01 = c00 + c01 - c10 - c11;
80
233M
      float r10 = c00 - c01 + c10 - c11;
81
233M
      float r11 = c00 - c01 - c10 + c11;
82
233M
      r00 *= 0.25f;
83
233M
      r01 *= 0.25f;
84
233M
      r10 *= 0.25f;
85
233M
      r11 *= 0.25f;
86
233M
      temp[y * kBlockDim + x] = r00;
87
233M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
233M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
233M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
233M
    }
91
58.4M
  }
92
131M
  for (size_t y = 0; y < S; y++) {
93
1.05G
    for (size_t x = 0; x < S; x++) {
94
934M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
934M
    }
96
116M
  }
97
14.6M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
14.6M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
14.6M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
14.6M
  static_assert(S % 2 == 0, "S should be even");
70
14.6M
  float temp[kDCTBlockSize];
71
14.6M
  constexpr size_t num_2x2 = S / 2;
72
43.8M
  for (size_t y = 0; y < num_2x2; y++) {
73
87.6M
    for (size_t x = 0; x < num_2x2; x++) {
74
58.4M
      float c00 = block[y * 2 * stride + x * 2];
75
58.4M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
58.4M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
58.4M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
58.4M
      float r00 = c00 + c01 + c10 + c11;
79
58.4M
      float r01 = c00 + c01 - c10 - c11;
80
58.4M
      float r10 = c00 - c01 + c10 - c11;
81
58.4M
      float r11 = c00 - c01 - c10 + c11;
82
58.4M
      r00 *= 0.25f;
83
58.4M
      r01 *= 0.25f;
84
58.4M
      r10 *= 0.25f;
85
58.4M
      r11 *= 0.25f;
86
58.4M
      temp[y * kBlockDim + x] = r00;
87
58.4M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
58.4M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
58.4M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
58.4M
    }
91
29.2M
  }
92
73.0M
  for (size_t y = 0; y < S; y++) {
93
292M
    for (size_t x = 0; x < S; x++) {
94
233M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
233M
    }
96
58.4M
  }
97
14.6M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
14.6M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
14.6M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
14.6M
  static_assert(S % 2 == 0, "S should be even");
70
14.6M
  float temp[kDCTBlockSize];
71
14.6M
  constexpr size_t num_2x2 = S / 2;
72
29.2M
  for (size_t y = 0; y < num_2x2; y++) {
73
29.2M
    for (size_t x = 0; x < num_2x2; x++) {
74
14.6M
      float c00 = block[y * 2 * stride + x * 2];
75
14.6M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
14.6M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
14.6M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
14.6M
      float r00 = c00 + c01 + c10 + c11;
79
14.6M
      float r01 = c00 + c01 - c10 - c11;
80
14.6M
      float r10 = c00 - c01 + c10 - c11;
81
14.6M
      float r11 = c00 - c01 - c10 + c11;
82
14.6M
      r00 *= 0.25f;
83
14.6M
      r01 *= 0.25f;
84
14.6M
      r10 *= 0.25f;
85
14.6M
      r11 *= 0.25f;
86
14.6M
      temp[y * kBlockDim + x] = r00;
87
14.6M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
14.6M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
14.6M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
14.6M
    }
91
14.6M
  }
92
43.8M
  for (size_t y = 0; y < S; y++) {
93
87.6M
    for (size_t x = 0; x < S; x++) {
94
58.4M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
58.4M
    }
96
29.2M
  }
97
14.6M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
98
99
59.0M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
59.0M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
59.0M
      {
102
59.0M
          0.2500000000000000,
103
59.0M
          0.8769029297991420f,
104
59.0M
          0.0000000000000000,
105
59.0M
          0.0000000000000000,
106
59.0M
          0.0000000000000000,
107
59.0M
          -0.4105377591765233f,
108
59.0M
          0.0000000000000000,
109
59.0M
          0.0000000000000000,
110
59.0M
          0.0000000000000000,
111
59.0M
          0.0000000000000000,
112
59.0M
          0.0000000000000000,
113
59.0M
          0.0000000000000000,
114
59.0M
          0.0000000000000000,
115
59.0M
          0.0000000000000000,
116
59.0M
          0.0000000000000000,
117
59.0M
          0.0000000000000000,
118
59.0M
      },
119
59.0M
      {
120
59.0M
          0.2500000000000000,
121
59.0M
          0.2206518106944235f,
122
59.0M
          0.0000000000000000,
123
59.0M
          0.0000000000000000,
124
59.0M
          -0.7071067811865474f,
125
59.0M
          0.6235485373547691f,
126
59.0M
          0.0000000000000000,
127
59.0M
          0.0000000000000000,
128
59.0M
          0.0000000000000000,
129
59.0M
          0.0000000000000000,
130
59.0M
          0.0000000000000000,
131
59.0M
          0.0000000000000000,
132
59.0M
          0.0000000000000000,
133
59.0M
          0.0000000000000000,
134
59.0M
          0.0000000000000000,
135
59.0M
          0.0000000000000000,
136
59.0M
      },
137
59.0M
      {
138
59.0M
          0.2500000000000000,
139
59.0M
          -0.1014005039375376f,
140
59.0M
          0.4067007583026075f,
141
59.0M
          -0.2125574805828875f,
142
59.0M
          0.0000000000000000,
143
59.0M
          -0.0643507165794627f,
144
59.0M
          -0.4517556589999482f,
145
59.0M
          -0.3046847507248690f,
146
59.0M
          0.3017929516615495f,
147
59.0M
          0.4082482904638627f,
148
59.0M
          0.1747866975480809f,
149
59.0M
          -0.2110560104933578f,
150
59.0M
          -0.1426608480880726f,
151
59.0M
          -0.1381354035075859f,
152
59.0M
          -0.1743760259965107f,
153
59.0M
          0.1135498731499434f,
154
59.0M
      },
155
59.0M
      {
156
59.0M
          0.2500000000000000,
157
59.0M
          -0.1014005039375375f,
158
59.0M
          0.4444481661973445f,
159
59.0M
          0.3085497062849767f,
160
59.0M
          0.0000000000000000f,
161
59.0M
          -0.0643507165794627f,
162
59.0M
          0.1585450355184006f,
163
59.0M
          0.5112616136591823f,
164
59.0M
          0.2579236279634118f,
165
59.0M
          0.0000000000000000,
166
59.0M
          0.0812611176717539f,
167
59.0M
          0.1856718091610980f,
168
59.0M
          -0.3416446842253372f,
169
59.0M
          0.3302282550303788f,
170
59.0M
          0.0702790691196284f,
171
59.0M
          -0.0741750459581035f,
172
59.0M
      },
173
59.0M
      {
174
59.0M
          0.2500000000000000,
175
59.0M
          0.2206518106944236f,
176
59.0M
          0.0000000000000000,
177
59.0M
          0.0000000000000000,
178
59.0M
          0.7071067811865476f,
179
59.0M
          0.6235485373547694f,
180
59.0M
          0.0000000000000000,
181
59.0M
          0.0000000000000000,
182
59.0M
          0.0000000000000000,
183
59.0M
          0.0000000000000000,
184
59.0M
          0.0000000000000000,
185
59.0M
          0.0000000000000000,
186
59.0M
          0.0000000000000000,
187
59.0M
          0.0000000000000000,
188
59.0M
          0.0000000000000000,
189
59.0M
          0.0000000000000000,
190
59.0M
      },
191
59.0M
      {
192
59.0M
          0.2500000000000000,
193
59.0M
          -0.1014005039375378f,
194
59.0M
          0.0000000000000000,
195
59.0M
          0.4706702258572536f,
196
59.0M
          0.0000000000000000,
197
59.0M
          -0.0643507165794628f,
198
59.0M
          -0.0403851516082220f,
199
59.0M
          0.0000000000000000,
200
59.0M
          0.1627234014286620f,
201
59.0M
          0.0000000000000000,
202
59.0M
          0.0000000000000000,
203
59.0M
          0.0000000000000000,
204
59.0M
          0.7367497537172237f,
205
59.0M
          0.0875511500058708f,
206
59.0M
          -0.2921026642334881f,
207
59.0M
          0.1940289303259434f,
208
59.0M
      },
209
59.0M
      {
210
59.0M
          0.2500000000000000,
211
59.0M
          -0.1014005039375377f,
212
59.0M
          0.1957439937204294f,
213
59.0M
          -0.1621205195722993f,
214
59.0M
          0.0000000000000000,
215
59.0M
          -0.0643507165794628f,
216
59.0M
          0.0074182263792424f,
217
59.0M
          -0.2904801297289980f,
218
59.0M
          0.0952002265347504f,
219
59.0M
          0.0000000000000000,
220
59.0M
          -0.3675398009862027f,
221
59.0M
          0.4921585901373873f,
222
59.0M
          0.2462710772207515f,
223
59.0M
          -0.0794670660590957f,
224
59.0M
          0.3623817333531167f,
225
59.0M
          -0.4351904965232280f,
226
59.0M
      },
227
59.0M
      {
228
59.0M
          0.2500000000000000,
229
59.0M
          -0.1014005039375376f,
230
59.0M
          0.2929100136981264f,
231
59.0M
          0.0000000000000000,
232
59.0M
          0.0000000000000000,
233
59.0M
          -0.0643507165794627f,
234
59.0M
          0.3935103426921017f,
235
59.0M
          -0.0657870154914280f,
236
59.0M
          0.0000000000000000,
237
59.0M
          -0.4082482904638628f,
238
59.0M
          -0.3078822139579090f,
239
59.0M
          -0.3852501370925192f,
240
59.0M
          -0.0857401903551931f,
241
59.0M
          -0.4613374887461511f,
242
59.0M
          0.0000000000000000,
243
59.0M
          0.2191868483885747f,
244
59.0M
      },
245
59.0M
      {
246
59.0M
          0.2500000000000000,
247
59.0M
          -0.1014005039375376f,
248
59.0M
          -0.4067007583026072f,
249
59.0M
          -0.2125574805828705f,
250
59.0M
          0.0000000000000000,
251
59.0M
          -0.0643507165794627f,
252
59.0M
          -0.4517556589999464f,
253
59.0M
          0.3046847507248840f,
254
59.0M
          0.3017929516615503f,
255
59.0M
          -0.4082482904638635f,
256
59.0M
          -0.1747866975480813f,
257
59.0M
          0.2110560104933581f,
258
59.0M
          -0.1426608480880734f,
259
59.0M
          -0.1381354035075829f,
260
59.0M
          -0.1743760259965108f,
261
59.0M
          0.1135498731499426f,
262
59.0M
      },
263
59.0M
      {
264
59.0M
          0.2500000000000000,
265
59.0M
          -0.1014005039375377f,
266
59.0M
          -0.1957439937204287f,
267
59.0M
          -0.1621205195722833f,
268
59.0M
          0.0000000000000000,
269
59.0M
          -0.0643507165794628f,
270
59.0M
          0.0074182263792444f,
271
59.0M
          0.2904801297290076f,
272
59.0M
          0.0952002265347505f,
273
59.0M
          0.0000000000000000,
274
59.0M
          0.3675398009862011f,
275
59.0M
          -0.4921585901373891f,
276
59.0M
          0.2462710772207514f,
277
59.0M
          -0.0794670660591026f,
278
59.0M
          0.3623817333531165f,
279
59.0M
          -0.4351904965232251f,
280
59.0M
      },
281
59.0M
      {
282
59.0M
          0.2500000000000000,
283
59.0M
          -0.1014005039375375f,
284
59.0M
          0.0000000000000000,
285
59.0M
          -0.4706702258572528f,
286
59.0M
          0.0000000000000000,
287
59.0M
          -0.0643507165794627f,
288
59.0M
          0.1107416575309343f,
289
59.0M
          0.0000000000000000,
290
59.0M
          -0.1627234014286617f,
291
59.0M
          0.0000000000000000,
292
59.0M
          0.0000000000000000,
293
59.0M
          0.0000000000000000,
294
59.0M
          0.1488339922711357f,
295
59.0M
          0.4972464710953509f,
296
59.0M
          0.2921026642334879f,
297
59.0M
          0.5550443808910661f,
298
59.0M
      },
299
59.0M
      {
300
59.0M
          0.2500000000000000,
301
59.0M
          -0.1014005039375377f,
302
59.0M
          0.1137907446044809f,
303
59.0M
          -0.1464291867126764f,
304
59.0M
          0.0000000000000000,
305
59.0M
          -0.0643507165794628f,
306
59.0M
          0.0829816309488205f,
307
59.0M
          -0.2388977352334460f,
308
59.0M
          -0.3531238544981630f,
309
59.0M
          -0.4082482904638630f,
310
59.0M
          0.4826689115059883f,
311
59.0M
          0.1741941265991622f,
312
59.0M
          -0.0476868035022925f,
313
59.0M
          0.1253805944856366f,
314
59.0M
          -0.4326608024727445f,
315
59.0M
          -0.2546827712406646f,
316
59.0M
      },
317
59.0M
      {
318
59.0M
          0.2500000000000000,
319
59.0M
          -0.1014005039375377f,
320
59.0M
          -0.4444481661973438f,
321
59.0M
          0.3085497062849487f,
322
59.0M
          0.0000000000000000,
323
59.0M
          -0.0643507165794628f,
324
59.0M
          0.1585450355183970f,
325
59.0M
          -0.5112616136592012f,
326
59.0M
          0.2579236279634129f,
327
59.0M
          0.0000000000000000,
328
59.0M
          -0.0812611176717504f,
329
59.0M
          -0.1856718091610990f,
330
59.0M
          -0.3416446842253373f,
331
59.0M
          0.3302282550303805f,
332
59.0M
          0.0702790691196282f,
333
59.0M
          -0.0741750459581023f,
334
59.0M
      },
335
59.0M
      {
336
59.0M
          0.2500000000000000,
337
59.0M
          -0.1014005039375376f,
338
59.0M
          -0.2929100136981264f,
339
59.0M
          0.0000000000000000,
340
59.0M
          0.0000000000000000,
341
59.0M
          -0.0643507165794627f,
342
59.0M
          0.3935103426921022f,
343
59.0M
          0.0657870154914254f,
344
59.0M
          0.0000000000000000,
345
59.0M
          0.4082482904638634f,
346
59.0M
          0.3078822139579031f,
347
59.0M
          0.3852501370925211f,
348
59.0M
          -0.0857401903551927f,
349
59.0M
          -0.4613374887461554f,
350
59.0M
          0.0000000000000000,
351
59.0M
          0.2191868483885728f,
352
59.0M
      },
353
59.0M
      {
354
59.0M
          0.2500000000000000,
355
59.0M
          -0.1014005039375376f,
356
59.0M
          -0.1137907446044814f,
357
59.0M
          -0.1464291867126654f,
358
59.0M
          0.0000000000000000,
359
59.0M
          -0.0643507165794627f,
360
59.0M
          0.0829816309488214f,
361
59.0M
          0.2388977352334547f,
362
59.0M
          -0.3531238544981624f,
363
59.0M
          0.4082482904638630f,
364
59.0M
          -0.4826689115059858f,
365
59.0M
          -0.1741941265991621f,
366
59.0M
          -0.0476868035022928f,
367
59.0M
          0.1253805944856431f,
368
59.0M
          -0.4326608024727457f,
369
59.0M
          -0.2546827712406641f,
370
59.0M
      },
371
59.0M
      {
372
59.0M
          0.2500000000000000,
373
59.0M
          -0.1014005039375374f,
374
59.0M
          0.0000000000000000,
375
59.0M
          0.4251149611657548f,
376
59.0M
          0.0000000000000000,
377
59.0M
          -0.0643507165794626f,
378
59.0M
          -0.4517556589999480f,
379
59.0M
          0.0000000000000000,
380
59.0M
          -0.6035859033230976f,
381
59.0M
          0.0000000000000000,
382
59.0M
          0.0000000000000000,
383
59.0M
          0.0000000000000000,
384
59.0M
          -0.1426608480880724f,
385
59.0M
          -0.1381354035075845f,
386
59.0M
          0.3487520519930227f,
387
59.0M
          0.1135498731499429f,
388
59.0M
      },
389
59.0M
  };
390
391
59.0M
  const HWY_CAPPED(float, 16) d;
392
177M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
118M
    auto scalar = Zero(d);
394
2.00G
    for (size_t j = 0; j < 16; j++) {
395
1.89G
      auto px = Set(d, pixels[j]);
396
1.89G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
1.89G
      scalar = MulAdd(px, basis, scalar);
398
1.89G
    }
399
118M
    Store(scalar, d, coeffs + i);
400
118M
  }
401
59.0M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
331k
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
331k
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
331k
      {
102
331k
          0.2500000000000000,
103
331k
          0.8769029297991420f,
104
331k
          0.0000000000000000,
105
331k
          0.0000000000000000,
106
331k
          0.0000000000000000,
107
331k
          -0.4105377591765233f,
108
331k
          0.0000000000000000,
109
331k
          0.0000000000000000,
110
331k
          0.0000000000000000,
111
331k
          0.0000000000000000,
112
331k
          0.0000000000000000,
113
331k
          0.0000000000000000,
114
331k
          0.0000000000000000,
115
331k
          0.0000000000000000,
116
331k
          0.0000000000000000,
117
331k
          0.0000000000000000,
118
331k
      },
119
331k
      {
120
331k
          0.2500000000000000,
121
331k
          0.2206518106944235f,
122
331k
          0.0000000000000000,
123
331k
          0.0000000000000000,
124
331k
          -0.7071067811865474f,
125
331k
          0.6235485373547691f,
126
331k
          0.0000000000000000,
127
331k
          0.0000000000000000,
128
331k
          0.0000000000000000,
129
331k
          0.0000000000000000,
130
331k
          0.0000000000000000,
131
331k
          0.0000000000000000,
132
331k
          0.0000000000000000,
133
331k
          0.0000000000000000,
134
331k
          0.0000000000000000,
135
331k
          0.0000000000000000,
136
331k
      },
137
331k
      {
138
331k
          0.2500000000000000,
139
331k
          -0.1014005039375376f,
140
331k
          0.4067007583026075f,
141
331k
          -0.2125574805828875f,
142
331k
          0.0000000000000000,
143
331k
          -0.0643507165794627f,
144
331k
          -0.4517556589999482f,
145
331k
          -0.3046847507248690f,
146
331k
          0.3017929516615495f,
147
331k
          0.4082482904638627f,
148
331k
          0.1747866975480809f,
149
331k
          -0.2110560104933578f,
150
331k
          -0.1426608480880726f,
151
331k
          -0.1381354035075859f,
152
331k
          -0.1743760259965107f,
153
331k
          0.1135498731499434f,
154
331k
      },
155
331k
      {
156
331k
          0.2500000000000000,
157
331k
          -0.1014005039375375f,
158
331k
          0.4444481661973445f,
159
331k
          0.3085497062849767f,
160
331k
          0.0000000000000000f,
161
331k
          -0.0643507165794627f,
162
331k
          0.1585450355184006f,
163
331k
          0.5112616136591823f,
164
331k
          0.2579236279634118f,
165
331k
          0.0000000000000000,
166
331k
          0.0812611176717539f,
167
331k
          0.1856718091610980f,
168
331k
          -0.3416446842253372f,
169
331k
          0.3302282550303788f,
170
331k
          0.0702790691196284f,
171
331k
          -0.0741750459581035f,
172
331k
      },
173
331k
      {
174
331k
          0.2500000000000000,
175
331k
          0.2206518106944236f,
176
331k
          0.0000000000000000,
177
331k
          0.0000000000000000,
178
331k
          0.7071067811865476f,
179
331k
          0.6235485373547694f,
180
331k
          0.0000000000000000,
181
331k
          0.0000000000000000,
182
331k
          0.0000000000000000,
183
331k
          0.0000000000000000,
184
331k
          0.0000000000000000,
185
331k
          0.0000000000000000,
186
331k
          0.0000000000000000,
187
331k
          0.0000000000000000,
188
331k
          0.0000000000000000,
189
331k
          0.0000000000000000,
190
331k
      },
191
331k
      {
192
331k
          0.2500000000000000,
193
331k
          -0.1014005039375378f,
194
331k
          0.0000000000000000,
195
331k
          0.4706702258572536f,
196
331k
          0.0000000000000000,
197
331k
          -0.0643507165794628f,
198
331k
          -0.0403851516082220f,
199
331k
          0.0000000000000000,
200
331k
          0.1627234014286620f,
201
331k
          0.0000000000000000,
202
331k
          0.0000000000000000,
203
331k
          0.0000000000000000,
204
331k
          0.7367497537172237f,
205
331k
          0.0875511500058708f,
206
331k
          -0.2921026642334881f,
207
331k
          0.1940289303259434f,
208
331k
      },
209
331k
      {
210
331k
          0.2500000000000000,
211
331k
          -0.1014005039375377f,
212
331k
          0.1957439937204294f,
213
331k
          -0.1621205195722993f,
214
331k
          0.0000000000000000,
215
331k
          -0.0643507165794628f,
216
331k
          0.0074182263792424f,
217
331k
          -0.2904801297289980f,
218
331k
          0.0952002265347504f,
219
331k
          0.0000000000000000,
220
331k
          -0.3675398009862027f,
221
331k
          0.4921585901373873f,
222
331k
          0.2462710772207515f,
223
331k
          -0.0794670660590957f,
224
331k
          0.3623817333531167f,
225
331k
          -0.4351904965232280f,
226
331k
      },
227
331k
      {
228
331k
          0.2500000000000000,
229
331k
          -0.1014005039375376f,
230
331k
          0.2929100136981264f,
231
331k
          0.0000000000000000,
232
331k
          0.0000000000000000,
233
331k
          -0.0643507165794627f,
234
331k
          0.3935103426921017f,
235
331k
          -0.0657870154914280f,
236
331k
          0.0000000000000000,
237
331k
          -0.4082482904638628f,
238
331k
          -0.3078822139579090f,
239
331k
          -0.3852501370925192f,
240
331k
          -0.0857401903551931f,
241
331k
          -0.4613374887461511f,
242
331k
          0.0000000000000000,
243
331k
          0.2191868483885747f,
244
331k
      },
245
331k
      {
246
331k
          0.2500000000000000,
247
331k
          -0.1014005039375376f,
248
331k
          -0.4067007583026072f,
249
331k
          -0.2125574805828705f,
250
331k
          0.0000000000000000,
251
331k
          -0.0643507165794627f,
252
331k
          -0.4517556589999464f,
253
331k
          0.3046847507248840f,
254
331k
          0.3017929516615503f,
255
331k
          -0.4082482904638635f,
256
331k
          -0.1747866975480813f,
257
331k
          0.2110560104933581f,
258
331k
          -0.1426608480880734f,
259
331k
          -0.1381354035075829f,
260
331k
          -0.1743760259965108f,
261
331k
          0.1135498731499426f,
262
331k
      },
263
331k
      {
264
331k
          0.2500000000000000,
265
331k
          -0.1014005039375377f,
266
331k
          -0.1957439937204287f,
267
331k
          -0.1621205195722833f,
268
331k
          0.0000000000000000,
269
331k
          -0.0643507165794628f,
270
331k
          0.0074182263792444f,
271
331k
          0.2904801297290076f,
272
331k
          0.0952002265347505f,
273
331k
          0.0000000000000000,
274
331k
          0.3675398009862011f,
275
331k
          -0.4921585901373891f,
276
331k
          0.2462710772207514f,
277
331k
          -0.0794670660591026f,
278
331k
          0.3623817333531165f,
279
331k
          -0.4351904965232251f,
280
331k
      },
281
331k
      {
282
331k
          0.2500000000000000,
283
331k
          -0.1014005039375375f,
284
331k
          0.0000000000000000,
285
331k
          -0.4706702258572528f,
286
331k
          0.0000000000000000,
287
331k
          -0.0643507165794627f,
288
331k
          0.1107416575309343f,
289
331k
          0.0000000000000000,
290
331k
          -0.1627234014286617f,
291
331k
          0.0000000000000000,
292
331k
          0.0000000000000000,
293
331k
          0.0000000000000000,
294
331k
          0.1488339922711357f,
295
331k
          0.4972464710953509f,
296
331k
          0.2921026642334879f,
297
331k
          0.5550443808910661f,
298
331k
      },
299
331k
      {
300
331k
          0.2500000000000000,
301
331k
          -0.1014005039375377f,
302
331k
          0.1137907446044809f,
303
331k
          -0.1464291867126764f,
304
331k
          0.0000000000000000,
305
331k
          -0.0643507165794628f,
306
331k
          0.0829816309488205f,
307
331k
          -0.2388977352334460f,
308
331k
          -0.3531238544981630f,
309
331k
          -0.4082482904638630f,
310
331k
          0.4826689115059883f,
311
331k
          0.1741941265991622f,
312
331k
          -0.0476868035022925f,
313
331k
          0.1253805944856366f,
314
331k
          -0.4326608024727445f,
315
331k
          -0.2546827712406646f,
316
331k
      },
317
331k
      {
318
331k
          0.2500000000000000,
319
331k
          -0.1014005039375377f,
320
331k
          -0.4444481661973438f,
321
331k
          0.3085497062849487f,
322
331k
          0.0000000000000000,
323
331k
          -0.0643507165794628f,
324
331k
          0.1585450355183970f,
325
331k
          -0.5112616136592012f,
326
331k
          0.2579236279634129f,
327
331k
          0.0000000000000000,
328
331k
          -0.0812611176717504f,
329
331k
          -0.1856718091610990f,
330
331k
          -0.3416446842253373f,
331
331k
          0.3302282550303805f,
332
331k
          0.0702790691196282f,
333
331k
          -0.0741750459581023f,
334
331k
      },
335
331k
      {
336
331k
          0.2500000000000000,
337
331k
          -0.1014005039375376f,
338
331k
          -0.2929100136981264f,
339
331k
          0.0000000000000000,
340
331k
          0.0000000000000000,
341
331k
          -0.0643507165794627f,
342
331k
          0.3935103426921022f,
343
331k
          0.0657870154914254f,
344
331k
          0.0000000000000000,
345
331k
          0.4082482904638634f,
346
331k
          0.3078822139579031f,
347
331k
          0.3852501370925211f,
348
331k
          -0.0857401903551927f,
349
331k
          -0.4613374887461554f,
350
331k
          0.0000000000000000,
351
331k
          0.2191868483885728f,
352
331k
      },
353
331k
      {
354
331k
          0.2500000000000000,
355
331k
          -0.1014005039375376f,
356
331k
          -0.1137907446044814f,
357
331k
          -0.1464291867126654f,
358
331k
          0.0000000000000000,
359
331k
          -0.0643507165794627f,
360
331k
          0.0829816309488214f,
361
331k
          0.2388977352334547f,
362
331k
          -0.3531238544981624f,
363
331k
          0.4082482904638630f,
364
331k
          -0.4826689115059858f,
365
331k
          -0.1741941265991621f,
366
331k
          -0.0476868035022928f,
367
331k
          0.1253805944856431f,
368
331k
          -0.4326608024727457f,
369
331k
          -0.2546827712406641f,
370
331k
      },
371
331k
      {
372
331k
          0.2500000000000000,
373
331k
          -0.1014005039375374f,
374
331k
          0.0000000000000000,
375
331k
          0.4251149611657548f,
376
331k
          0.0000000000000000,
377
331k
          -0.0643507165794626f,
378
331k
          -0.4517556589999480f,
379
331k
          0.0000000000000000,
380
331k
          -0.6035859033230976f,
381
331k
          0.0000000000000000,
382
331k
          0.0000000000000000,
383
331k
          0.0000000000000000,
384
331k
          -0.1426608480880724f,
385
331k
          -0.1381354035075845f,
386
331k
          0.3487520519930227f,
387
331k
          0.1135498731499429f,
388
331k
      },
389
331k
  };
390
391
331k
  const HWY_CAPPED(float, 16) d;
392
993k
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
662k
    auto scalar = Zero(d);
394
11.2M
    for (size_t j = 0; j < 16; j++) {
395
10.5M
      auto px = Set(d, pixels[j]);
396
10.5M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
10.5M
      scalar = MulAdd(px, basis, scalar);
398
10.5M
    }
399
662k
    Store(scalar, d, coeffs + i);
400
662k
  }
401
331k
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
331k
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
331k
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
331k
      {
102
331k
          0.2500000000000000,
103
331k
          0.8769029297991420f,
104
331k
          0.0000000000000000,
105
331k
          0.0000000000000000,
106
331k
          0.0000000000000000,
107
331k
          -0.4105377591765233f,
108
331k
          0.0000000000000000,
109
331k
          0.0000000000000000,
110
331k
          0.0000000000000000,
111
331k
          0.0000000000000000,
112
331k
          0.0000000000000000,
113
331k
          0.0000000000000000,
114
331k
          0.0000000000000000,
115
331k
          0.0000000000000000,
116
331k
          0.0000000000000000,
117
331k
          0.0000000000000000,
118
331k
      },
119
331k
      {
120
331k
          0.2500000000000000,
121
331k
          0.2206518106944235f,
122
331k
          0.0000000000000000,
123
331k
          0.0000000000000000,
124
331k
          -0.7071067811865474f,
125
331k
          0.6235485373547691f,
126
331k
          0.0000000000000000,
127
331k
          0.0000000000000000,
128
331k
          0.0000000000000000,
129
331k
          0.0000000000000000,
130
331k
          0.0000000000000000,
131
331k
          0.0000000000000000,
132
331k
          0.0000000000000000,
133
331k
          0.0000000000000000,
134
331k
          0.0000000000000000,
135
331k
          0.0000000000000000,
136
331k
      },
137
331k
      {
138
331k
          0.2500000000000000,
139
331k
          -0.1014005039375376f,
140
331k
          0.4067007583026075f,
141
331k
          -0.2125574805828875f,
142
331k
          0.0000000000000000,
143
331k
          -0.0643507165794627f,
144
331k
          -0.4517556589999482f,
145
331k
          -0.3046847507248690f,
146
331k
          0.3017929516615495f,
147
331k
          0.4082482904638627f,
148
331k
          0.1747866975480809f,
149
331k
          -0.2110560104933578f,
150
331k
          -0.1426608480880726f,
151
331k
          -0.1381354035075859f,
152
331k
          -0.1743760259965107f,
153
331k
          0.1135498731499434f,
154
331k
      },
155
331k
      {
156
331k
          0.2500000000000000,
157
331k
          -0.1014005039375375f,
158
331k
          0.4444481661973445f,
159
331k
          0.3085497062849767f,
160
331k
          0.0000000000000000f,
161
331k
          -0.0643507165794627f,
162
331k
          0.1585450355184006f,
163
331k
          0.5112616136591823f,
164
331k
          0.2579236279634118f,
165
331k
          0.0000000000000000,
166
331k
          0.0812611176717539f,
167
331k
          0.1856718091610980f,
168
331k
          -0.3416446842253372f,
169
331k
          0.3302282550303788f,
170
331k
          0.0702790691196284f,
171
331k
          -0.0741750459581035f,
172
331k
      },
173
331k
      {
174
331k
          0.2500000000000000,
175
331k
          0.2206518106944236f,
176
331k
          0.0000000000000000,
177
331k
          0.0000000000000000,
178
331k
          0.7071067811865476f,
179
331k
          0.6235485373547694f,
180
331k
          0.0000000000000000,
181
331k
          0.0000000000000000,
182
331k
          0.0000000000000000,
183
331k
          0.0000000000000000,
184
331k
          0.0000000000000000,
185
331k
          0.0000000000000000,
186
331k
          0.0000000000000000,
187
331k
          0.0000000000000000,
188
331k
          0.0000000000000000,
189
331k
          0.0000000000000000,
190
331k
      },
191
331k
      {
192
331k
          0.2500000000000000,
193
331k
          -0.1014005039375378f,
194
331k
          0.0000000000000000,
195
331k
          0.4706702258572536f,
196
331k
          0.0000000000000000,
197
331k
          -0.0643507165794628f,
198
331k
          -0.0403851516082220f,
199
331k
          0.0000000000000000,
200
331k
          0.1627234014286620f,
201
331k
          0.0000000000000000,
202
331k
          0.0000000000000000,
203
331k
          0.0000000000000000,
204
331k
          0.7367497537172237f,
205
331k
          0.0875511500058708f,
206
331k
          -0.2921026642334881f,
207
331k
          0.1940289303259434f,
208
331k
      },
209
331k
      {
210
331k
          0.2500000000000000,
211
331k
          -0.1014005039375377f,
212
331k
          0.1957439937204294f,
213
331k
          -0.1621205195722993f,
214
331k
          0.0000000000000000,
215
331k
          -0.0643507165794628f,
216
331k
          0.0074182263792424f,
217
331k
          -0.2904801297289980f,
218
331k
          0.0952002265347504f,
219
331k
          0.0000000000000000,
220
331k
          -0.3675398009862027f,
221
331k
          0.4921585901373873f,
222
331k
          0.2462710772207515f,
223
331k
          -0.0794670660590957f,
224
331k
          0.3623817333531167f,
225
331k
          -0.4351904965232280f,
226
331k
      },
227
331k
      {
228
331k
          0.2500000000000000,
229
331k
          -0.1014005039375376f,
230
331k
          0.2929100136981264f,
231
331k
          0.0000000000000000,
232
331k
          0.0000000000000000,
233
331k
          -0.0643507165794627f,
234
331k
          0.3935103426921017f,
235
331k
          -0.0657870154914280f,
236
331k
          0.0000000000000000,
237
331k
          -0.4082482904638628f,
238
331k
          -0.3078822139579090f,
239
331k
          -0.3852501370925192f,
240
331k
          -0.0857401903551931f,
241
331k
          -0.4613374887461511f,
242
331k
          0.0000000000000000,
243
331k
          0.2191868483885747f,
244
331k
      },
245
331k
      {
246
331k
          0.2500000000000000,
247
331k
          -0.1014005039375376f,
248
331k
          -0.4067007583026072f,
249
331k
          -0.2125574805828705f,
250
331k
          0.0000000000000000,
251
331k
          -0.0643507165794627f,
252
331k
          -0.4517556589999464f,
253
331k
          0.3046847507248840f,
254
331k
          0.3017929516615503f,
255
331k
          -0.4082482904638635f,
256
331k
          -0.1747866975480813f,
257
331k
          0.2110560104933581f,
258
331k
          -0.1426608480880734f,
259
331k
          -0.1381354035075829f,
260
331k
          -0.1743760259965108f,
261
331k
          0.1135498731499426f,
262
331k
      },
263
331k
      {
264
331k
          0.2500000000000000,
265
331k
          -0.1014005039375377f,
266
331k
          -0.1957439937204287f,
267
331k
          -0.1621205195722833f,
268
331k
          0.0000000000000000,
269
331k
          -0.0643507165794628f,
270
331k
          0.0074182263792444f,
271
331k
          0.2904801297290076f,
272
331k
          0.0952002265347505f,
273
331k
          0.0000000000000000,
274
331k
          0.3675398009862011f,
275
331k
          -0.4921585901373891f,
276
331k
          0.2462710772207514f,
277
331k
          -0.0794670660591026f,
278
331k
          0.3623817333531165f,
279
331k
          -0.4351904965232251f,
280
331k
      },
281
331k
      {
282
331k
          0.2500000000000000,
283
331k
          -0.1014005039375375f,
284
331k
          0.0000000000000000,
285
331k
          -0.4706702258572528f,
286
331k
          0.0000000000000000,
287
331k
          -0.0643507165794627f,
288
331k
          0.1107416575309343f,
289
331k
          0.0000000000000000,
290
331k
          -0.1627234014286617f,
291
331k
          0.0000000000000000,
292
331k
          0.0000000000000000,
293
331k
          0.0000000000000000,
294
331k
          0.1488339922711357f,
295
331k
          0.4972464710953509f,
296
331k
          0.2921026642334879f,
297
331k
          0.5550443808910661f,
298
331k
      },
299
331k
      {
300
331k
          0.2500000000000000,
301
331k
          -0.1014005039375377f,
302
331k
          0.1137907446044809f,
303
331k
          -0.1464291867126764f,
304
331k
          0.0000000000000000,
305
331k
          -0.0643507165794628f,
306
331k
          0.0829816309488205f,
307
331k
          -0.2388977352334460f,
308
331k
          -0.3531238544981630f,
309
331k
          -0.4082482904638630f,
310
331k
          0.4826689115059883f,
311
331k
          0.1741941265991622f,
312
331k
          -0.0476868035022925f,
313
331k
          0.1253805944856366f,
314
331k
          -0.4326608024727445f,
315
331k
          -0.2546827712406646f,
316
331k
      },
317
331k
      {
318
331k
          0.2500000000000000,
319
331k
          -0.1014005039375377f,
320
331k
          -0.4444481661973438f,
321
331k
          0.3085497062849487f,
322
331k
          0.0000000000000000,
323
331k
          -0.0643507165794628f,
324
331k
          0.1585450355183970f,
325
331k
          -0.5112616136592012f,
326
331k
          0.2579236279634129f,
327
331k
          0.0000000000000000,
328
331k
          -0.0812611176717504f,
329
331k
          -0.1856718091610990f,
330
331k
          -0.3416446842253373f,
331
331k
          0.3302282550303805f,
332
331k
          0.0702790691196282f,
333
331k
          -0.0741750459581023f,
334
331k
      },
335
331k
      {
336
331k
          0.2500000000000000,
337
331k
          -0.1014005039375376f,
338
331k
          -0.2929100136981264f,
339
331k
          0.0000000000000000,
340
331k
          0.0000000000000000,
341
331k
          -0.0643507165794627f,
342
331k
          0.3935103426921022f,
343
331k
          0.0657870154914254f,
344
331k
          0.0000000000000000,
345
331k
          0.4082482904638634f,
346
331k
          0.3078822139579031f,
347
331k
          0.3852501370925211f,
348
331k
          -0.0857401903551927f,
349
331k
          -0.4613374887461554f,
350
331k
          0.0000000000000000,
351
331k
          0.2191868483885728f,
352
331k
      },
353
331k
      {
354
331k
          0.2500000000000000,
355
331k
          -0.1014005039375376f,
356
331k
          -0.1137907446044814f,
357
331k
          -0.1464291867126654f,
358
331k
          0.0000000000000000,
359
331k
          -0.0643507165794627f,
360
331k
          0.0829816309488214f,
361
331k
          0.2388977352334547f,
362
331k
          -0.3531238544981624f,
363
331k
          0.4082482904638630f,
364
331k
          -0.4826689115059858f,
365
331k
          -0.1741941265991621f,
366
331k
          -0.0476868035022928f,
367
331k
          0.1253805944856431f,
368
331k
          -0.4326608024727457f,
369
331k
          -0.2546827712406641f,
370
331k
      },
371
331k
      {
372
331k
          0.2500000000000000,
373
331k
          -0.1014005039375374f,
374
331k
          0.0000000000000000,
375
331k
          0.4251149611657548f,
376
331k
          0.0000000000000000,
377
331k
          -0.0643507165794626f,
378
331k
          -0.4517556589999480f,
379
331k
          0.0000000000000000,
380
331k
          -0.6035859033230976f,
381
331k
          0.0000000000000000,
382
331k
          0.0000000000000000,
383
331k
          0.0000000000000000,
384
331k
          -0.1426608480880724f,
385
331k
          -0.1381354035075845f,
386
331k
          0.3487520519930227f,
387
331k
          0.1135498731499429f,
388
331k
      },
389
331k
  };
390
391
331k
  const HWY_CAPPED(float, 16) d;
392
993k
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
662k
    auto scalar = Zero(d);
394
11.2M
    for (size_t j = 0; j < 16; j++) {
395
10.5M
      auto px = Set(d, pixels[j]);
396
10.5M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
10.5M
      scalar = MulAdd(px, basis, scalar);
398
10.5M
    }
399
662k
    Store(scalar, d, coeffs + i);
400
662k
  }
401
331k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
58.4M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
58.4M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
58.4M
      {
102
58.4M
          0.2500000000000000,
103
58.4M
          0.8769029297991420f,
104
58.4M
          0.0000000000000000,
105
58.4M
          0.0000000000000000,
106
58.4M
          0.0000000000000000,
107
58.4M
          -0.4105377591765233f,
108
58.4M
          0.0000000000000000,
109
58.4M
          0.0000000000000000,
110
58.4M
          0.0000000000000000,
111
58.4M
          0.0000000000000000,
112
58.4M
          0.0000000000000000,
113
58.4M
          0.0000000000000000,
114
58.4M
          0.0000000000000000,
115
58.4M
          0.0000000000000000,
116
58.4M
          0.0000000000000000,
117
58.4M
          0.0000000000000000,
118
58.4M
      },
119
58.4M
      {
120
58.4M
          0.2500000000000000,
121
58.4M
          0.2206518106944235f,
122
58.4M
          0.0000000000000000,
123
58.4M
          0.0000000000000000,
124
58.4M
          -0.7071067811865474f,
125
58.4M
          0.6235485373547691f,
126
58.4M
          0.0000000000000000,
127
58.4M
          0.0000000000000000,
128
58.4M
          0.0000000000000000,
129
58.4M
          0.0000000000000000,
130
58.4M
          0.0000000000000000,
131
58.4M
          0.0000000000000000,
132
58.4M
          0.0000000000000000,
133
58.4M
          0.0000000000000000,
134
58.4M
          0.0000000000000000,
135
58.4M
          0.0000000000000000,
136
58.4M
      },
137
58.4M
      {
138
58.4M
          0.2500000000000000,
139
58.4M
          -0.1014005039375376f,
140
58.4M
          0.4067007583026075f,
141
58.4M
          -0.2125574805828875f,
142
58.4M
          0.0000000000000000,
143
58.4M
          -0.0643507165794627f,
144
58.4M
          -0.4517556589999482f,
145
58.4M
          -0.3046847507248690f,
146
58.4M
          0.3017929516615495f,
147
58.4M
          0.4082482904638627f,
148
58.4M
          0.1747866975480809f,
149
58.4M
          -0.2110560104933578f,
150
58.4M
          -0.1426608480880726f,
151
58.4M
          -0.1381354035075859f,
152
58.4M
          -0.1743760259965107f,
153
58.4M
          0.1135498731499434f,
154
58.4M
      },
155
58.4M
      {
156
58.4M
          0.2500000000000000,
157
58.4M
          -0.1014005039375375f,
158
58.4M
          0.4444481661973445f,
159
58.4M
          0.3085497062849767f,
160
58.4M
          0.0000000000000000f,
161
58.4M
          -0.0643507165794627f,
162
58.4M
          0.1585450355184006f,
163
58.4M
          0.5112616136591823f,
164
58.4M
          0.2579236279634118f,
165
58.4M
          0.0000000000000000,
166
58.4M
          0.0812611176717539f,
167
58.4M
          0.1856718091610980f,
168
58.4M
          -0.3416446842253372f,
169
58.4M
          0.3302282550303788f,
170
58.4M
          0.0702790691196284f,
171
58.4M
          -0.0741750459581035f,
172
58.4M
      },
173
58.4M
      {
174
58.4M
          0.2500000000000000,
175
58.4M
          0.2206518106944236f,
176
58.4M
          0.0000000000000000,
177
58.4M
          0.0000000000000000,
178
58.4M
          0.7071067811865476f,
179
58.4M
          0.6235485373547694f,
180
58.4M
          0.0000000000000000,
181
58.4M
          0.0000000000000000,
182
58.4M
          0.0000000000000000,
183
58.4M
          0.0000000000000000,
184
58.4M
          0.0000000000000000,
185
58.4M
          0.0000000000000000,
186
58.4M
          0.0000000000000000,
187
58.4M
          0.0000000000000000,
188
58.4M
          0.0000000000000000,
189
58.4M
          0.0000000000000000,
190
58.4M
      },
191
58.4M
      {
192
58.4M
          0.2500000000000000,
193
58.4M
          -0.1014005039375378f,
194
58.4M
          0.0000000000000000,
195
58.4M
          0.4706702258572536f,
196
58.4M
          0.0000000000000000,
197
58.4M
          -0.0643507165794628f,
198
58.4M
          -0.0403851516082220f,
199
58.4M
          0.0000000000000000,
200
58.4M
          0.1627234014286620f,
201
58.4M
          0.0000000000000000,
202
58.4M
          0.0000000000000000,
203
58.4M
          0.0000000000000000,
204
58.4M
          0.7367497537172237f,
205
58.4M
          0.0875511500058708f,
206
58.4M
          -0.2921026642334881f,
207
58.4M
          0.1940289303259434f,
208
58.4M
      },
209
58.4M
      {
210
58.4M
          0.2500000000000000,
211
58.4M
          -0.1014005039375377f,
212
58.4M
          0.1957439937204294f,
213
58.4M
          -0.1621205195722993f,
214
58.4M
          0.0000000000000000,
215
58.4M
          -0.0643507165794628f,
216
58.4M
          0.0074182263792424f,
217
58.4M
          -0.2904801297289980f,
218
58.4M
          0.0952002265347504f,
219
58.4M
          0.0000000000000000,
220
58.4M
          -0.3675398009862027f,
221
58.4M
          0.4921585901373873f,
222
58.4M
          0.2462710772207515f,
223
58.4M
          -0.0794670660590957f,
224
58.4M
          0.3623817333531167f,
225
58.4M
          -0.4351904965232280f,
226
58.4M
      },
227
58.4M
      {
228
58.4M
          0.2500000000000000,
229
58.4M
          -0.1014005039375376f,
230
58.4M
          0.2929100136981264f,
231
58.4M
          0.0000000000000000,
232
58.4M
          0.0000000000000000,
233
58.4M
          -0.0643507165794627f,
234
58.4M
          0.3935103426921017f,
235
58.4M
          -0.0657870154914280f,
236
58.4M
          0.0000000000000000,
237
58.4M
          -0.4082482904638628f,
238
58.4M
          -0.3078822139579090f,
239
58.4M
          -0.3852501370925192f,
240
58.4M
          -0.0857401903551931f,
241
58.4M
          -0.4613374887461511f,
242
58.4M
          0.0000000000000000,
243
58.4M
          0.2191868483885747f,
244
58.4M
      },
245
58.4M
      {
246
58.4M
          0.2500000000000000,
247
58.4M
          -0.1014005039375376f,
248
58.4M
          -0.4067007583026072f,
249
58.4M
          -0.2125574805828705f,
250
58.4M
          0.0000000000000000,
251
58.4M
          -0.0643507165794627f,
252
58.4M
          -0.4517556589999464f,
253
58.4M
          0.3046847507248840f,
254
58.4M
          0.3017929516615503f,
255
58.4M
          -0.4082482904638635f,
256
58.4M
          -0.1747866975480813f,
257
58.4M
          0.2110560104933581f,
258
58.4M
          -0.1426608480880734f,
259
58.4M
          -0.1381354035075829f,
260
58.4M
          -0.1743760259965108f,
261
58.4M
          0.1135498731499426f,
262
58.4M
      },
263
58.4M
      {
264
58.4M
          0.2500000000000000,
265
58.4M
          -0.1014005039375377f,
266
58.4M
          -0.1957439937204287f,
267
58.4M
          -0.1621205195722833f,
268
58.4M
          0.0000000000000000,
269
58.4M
          -0.0643507165794628f,
270
58.4M
          0.0074182263792444f,
271
58.4M
          0.2904801297290076f,
272
58.4M
          0.0952002265347505f,
273
58.4M
          0.0000000000000000,
274
58.4M
          0.3675398009862011f,
275
58.4M
          -0.4921585901373891f,
276
58.4M
          0.2462710772207514f,
277
58.4M
          -0.0794670660591026f,
278
58.4M
          0.3623817333531165f,
279
58.4M
          -0.4351904965232251f,
280
58.4M
      },
281
58.4M
      {
282
58.4M
          0.2500000000000000,
283
58.4M
          -0.1014005039375375f,
284
58.4M
          0.0000000000000000,
285
58.4M
          -0.4706702258572528f,
286
58.4M
          0.0000000000000000,
287
58.4M
          -0.0643507165794627f,
288
58.4M
          0.1107416575309343f,
289
58.4M
          0.0000000000000000,
290
58.4M
          -0.1627234014286617f,
291
58.4M
          0.0000000000000000,
292
58.4M
          0.0000000000000000,
293
58.4M
          0.0000000000000000,
294
58.4M
          0.1488339922711357f,
295
58.4M
          0.4972464710953509f,
296
58.4M
          0.2921026642334879f,
297
58.4M
          0.5550443808910661f,
298
58.4M
      },
299
58.4M
      {
300
58.4M
          0.2500000000000000,
301
58.4M
          -0.1014005039375377f,
302
58.4M
          0.1137907446044809f,
303
58.4M
          -0.1464291867126764f,
304
58.4M
          0.0000000000000000,
305
58.4M
          -0.0643507165794628f,
306
58.4M
          0.0829816309488205f,
307
58.4M
          -0.2388977352334460f,
308
58.4M
          -0.3531238544981630f,
309
58.4M
          -0.4082482904638630f,
310
58.4M
          0.4826689115059883f,
311
58.4M
          0.1741941265991622f,
312
58.4M
          -0.0476868035022925f,
313
58.4M
          0.1253805944856366f,
314
58.4M
          -0.4326608024727445f,
315
58.4M
          -0.2546827712406646f,
316
58.4M
      },
317
58.4M
      {
318
58.4M
          0.2500000000000000,
319
58.4M
          -0.1014005039375377f,
320
58.4M
          -0.4444481661973438f,
321
58.4M
          0.3085497062849487f,
322
58.4M
          0.0000000000000000,
323
58.4M
          -0.0643507165794628f,
324
58.4M
          0.1585450355183970f,
325
58.4M
          -0.5112616136592012f,
326
58.4M
          0.2579236279634129f,
327
58.4M
          0.0000000000000000,
328
58.4M
          -0.0812611176717504f,
329
58.4M
          -0.1856718091610990f,
330
58.4M
          -0.3416446842253373f,
331
58.4M
          0.3302282550303805f,
332
58.4M
          0.0702790691196282f,
333
58.4M
          -0.0741750459581023f,
334
58.4M
      },
335
58.4M
      {
336
58.4M
          0.2500000000000000,
337
58.4M
          -0.1014005039375376f,
338
58.4M
          -0.2929100136981264f,
339
58.4M
          0.0000000000000000,
340
58.4M
          0.0000000000000000,
341
58.4M
          -0.0643507165794627f,
342
58.4M
          0.3935103426921022f,
343
58.4M
          0.0657870154914254f,
344
58.4M
          0.0000000000000000,
345
58.4M
          0.4082482904638634f,
346
58.4M
          0.3078822139579031f,
347
58.4M
          0.3852501370925211f,
348
58.4M
          -0.0857401903551927f,
349
58.4M
          -0.4613374887461554f,
350
58.4M
          0.0000000000000000,
351
58.4M
          0.2191868483885728f,
352
58.4M
      },
353
58.4M
      {
354
58.4M
          0.2500000000000000,
355
58.4M
          -0.1014005039375376f,
356
58.4M
          -0.1137907446044814f,
357
58.4M
          -0.1464291867126654f,
358
58.4M
          0.0000000000000000,
359
58.4M
          -0.0643507165794627f,
360
58.4M
          0.0829816309488214f,
361
58.4M
          0.2388977352334547f,
362
58.4M
          -0.3531238544981624f,
363
58.4M
          0.4082482904638630f,
364
58.4M
          -0.4826689115059858f,
365
58.4M
          -0.1741941265991621f,
366
58.4M
          -0.0476868035022928f,
367
58.4M
          0.1253805944856431f,
368
58.4M
          -0.4326608024727457f,
369
58.4M
          -0.2546827712406641f,
370
58.4M
      },
371
58.4M
      {
372
58.4M
          0.2500000000000000,
373
58.4M
          -0.1014005039375374f,
374
58.4M
          0.0000000000000000,
375
58.4M
          0.4251149611657548f,
376
58.4M
          0.0000000000000000,
377
58.4M
          -0.0643507165794626f,
378
58.4M
          -0.4517556589999480f,
379
58.4M
          0.0000000000000000,
380
58.4M
          -0.6035859033230976f,
381
58.4M
          0.0000000000000000,
382
58.4M
          0.0000000000000000,
383
58.4M
          0.0000000000000000,
384
58.4M
          -0.1426608480880724f,
385
58.4M
          -0.1381354035075845f,
386
58.4M
          0.3487520519930227f,
387
58.4M
          0.1135498731499429f,
388
58.4M
      },
389
58.4M
  };
390
391
58.4M
  const HWY_CAPPED(float, 16) d;
392
175M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
116M
    auto scalar = Zero(d);
394
1.98G
    for (size_t j = 0; j < 16; j++) {
395
1.86G
      auto px = Set(d, pixels[j]);
396
1.86G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
1.86G
      scalar = MulAdd(px, basis, scalar);
398
1.86G
    }
399
116M
    Store(scalar, d, coeffs + i);
400
116M
  }
401
58.4M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
402
403
// Coefficient layout:
404
//  - (even, even) positions hold AFV coefficients
405
//  - (odd, even) positions hold DCT4x4 coefficients
406
//  - (any, odd) positions hold DCT4x8 coefficients
407
template <size_t afv_kind>
408
void AFVTransformFromPixels(const float* JXL_RESTRICT pixels,
409
                            size_t pixels_stride,
410
59.0M
                            float* JXL_RESTRICT coefficients) {
411
59.0M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
59.0M
  size_t afv_x = afv_kind & 1;
413
59.0M
  size_t afv_y = afv_kind / 2;
414
59.0M
  HWY_ALIGN float block[4 * 8] = {};
415
295M
  for (size_t iy = 0; iy < 4; iy++) {
416
1.18G
    for (size_t ix = 0; ix < 4; ix++) {
417
945M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
945M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
945M
    }
420
236M
  }
421
  // AFV coefficients in (even, even) positions.
422
59.0M
  HWY_ALIGN float coeff[4 * 4];
423
59.0M
  AFVDCT4x4(block, coeff);
424
295M
  for (size_t iy = 0; iy < 4; iy++) {
425
1.18G
    for (size_t ix = 0; ix < 4; ix++) {
426
945M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
945M
    }
428
236M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
59.0M
  ComputeScaledDCT<4, 4>()(
431
59.0M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
59.0M
              pixels_stride),
433
59.0M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
295M
  for (size_t iy = 0; iy < 4; iy++) {
436
2.12G
    for (size_t ix = 0; ix < 8; ix++) {
437
1.89G
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
1.89G
    }
439
236M
  }
440
  // 4x8 DCT of the other half of the block.
441
59.0M
  ComputeScaledDCT<4, 8>()(
442
59.0M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
59.0M
      block, scratch_space);
444
295M
  for (size_t iy = 0; iy < 4; iy++) {
445
2.12G
    for (size_t ix = 0; ix < 8; ix++) {
446
1.89G
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
1.89G
    }
448
236M
  }
449
59.0M
  float block00 = coefficients[0] * 0.25f;
450
59.0M
  float block01 = coefficients[1];
451
59.0M
  float block10 = coefficients[8];
452
59.0M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
59.0M
  coefficients[1] = (block00 - block01) * 0.5f;
454
59.0M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
59.0M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
106k
                            float* JXL_RESTRICT coefficients) {
411
106k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
106k
  size_t afv_x = afv_kind & 1;
413
106k
  size_t afv_y = afv_kind / 2;
414
106k
  HWY_ALIGN float block[4 * 8] = {};
415
534k
  for (size_t iy = 0; iy < 4; iy++) {
416
2.13M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.71M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.71M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.71M
    }
420
427k
  }
421
  // AFV coefficients in (even, even) positions.
422
106k
  HWY_ALIGN float coeff[4 * 4];
423
106k
  AFVDCT4x4(block, coeff);
424
534k
  for (size_t iy = 0; iy < 4; iy++) {
425
2.13M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.71M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.71M
    }
428
427k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
106k
  ComputeScaledDCT<4, 4>()(
431
106k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
106k
              pixels_stride),
433
106k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
534k
  for (size_t iy = 0; iy < 4; iy++) {
436
3.85M
    for (size_t ix = 0; ix < 8; ix++) {
437
3.42M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
3.42M
    }
439
427k
  }
440
  // 4x8 DCT of the other half of the block.
441
106k
  ComputeScaledDCT<4, 8>()(
442
106k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
106k
      block, scratch_space);
444
534k
  for (size_t iy = 0; iy < 4; iy++) {
445
3.85M
    for (size_t ix = 0; ix < 8; ix++) {
446
3.42M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
3.42M
    }
448
427k
  }
449
106k
  float block00 = coefficients[0] * 0.25f;
450
106k
  float block01 = coefficients[1];
451
106k
  float block10 = coefficients[8];
452
106k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
106k
  coefficients[1] = (block00 - block01) * 0.5f;
454
106k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
106k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
62.9k
                            float* JXL_RESTRICT coefficients) {
411
62.9k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
62.9k
  size_t afv_x = afv_kind & 1;
413
62.9k
  size_t afv_y = afv_kind / 2;
414
62.9k
  HWY_ALIGN float block[4 * 8] = {};
415
314k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.25M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.00M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.00M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.00M
    }
420
251k
  }
421
  // AFV coefficients in (even, even) positions.
422
62.9k
  HWY_ALIGN float coeff[4 * 4];
423
62.9k
  AFVDCT4x4(block, coeff);
424
314k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.25M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.00M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.00M
    }
428
251k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
62.9k
  ComputeScaledDCT<4, 4>()(
431
62.9k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
62.9k
              pixels_stride),
433
62.9k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
314k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.26M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.01M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.01M
    }
439
251k
  }
440
  // 4x8 DCT of the other half of the block.
441
62.9k
  ComputeScaledDCT<4, 8>()(
442
62.9k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
62.9k
      block, scratch_space);
444
314k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.26M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.01M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.01M
    }
448
251k
  }
449
62.9k
  float block00 = coefficients[0] * 0.25f;
450
62.9k
  float block01 = coefficients[1];
451
62.9k
  float block10 = coefficients[8];
452
62.9k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
62.9k
  coefficients[1] = (block00 - block01) * 0.5f;
454
62.9k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
62.9k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
77.1k
                            float* JXL_RESTRICT coefficients) {
411
77.1k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
77.1k
  size_t afv_x = afv_kind & 1;
413
77.1k
  size_t afv_y = afv_kind / 2;
414
77.1k
  HWY_ALIGN float block[4 * 8] = {};
415
385k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.54M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.23M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.23M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.23M
    }
420
308k
  }
421
  // AFV coefficients in (even, even) positions.
422
77.1k
  HWY_ALIGN float coeff[4 * 4];
423
77.1k
  AFVDCT4x4(block, coeff);
424
385k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.54M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.23M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.23M
    }
428
308k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
77.1k
  ComputeScaledDCT<4, 4>()(
431
77.1k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
77.1k
              pixels_stride),
433
77.1k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
385k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.77M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.46M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.46M
    }
439
308k
  }
440
  // 4x8 DCT of the other half of the block.
441
77.1k
  ComputeScaledDCT<4, 8>()(
442
77.1k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
77.1k
      block, scratch_space);
444
385k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.77M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.46M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.46M
    }
448
308k
  }
449
77.1k
  float block00 = coefficients[0] * 0.25f;
450
77.1k
  float block01 = coefficients[1];
451
77.1k
  float block10 = coefficients[8];
452
77.1k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
77.1k
  coefficients[1] = (block00 - block01) * 0.5f;
454
77.1k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
77.1k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
84.0k
                            float* JXL_RESTRICT coefficients) {
411
84.0k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
84.0k
  size_t afv_x = afv_kind & 1;
413
84.0k
  size_t afv_y = afv_kind / 2;
414
84.0k
  HWY_ALIGN float block[4 * 8] = {};
415
420k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.68M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.34M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.34M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.34M
    }
420
336k
  }
421
  // AFV coefficients in (even, even) positions.
422
84.0k
  HWY_ALIGN float coeff[4 * 4];
423
84.0k
  AFVDCT4x4(block, coeff);
424
420k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.68M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.34M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.34M
    }
428
336k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
84.0k
  ComputeScaledDCT<4, 4>()(
431
84.0k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
84.0k
              pixels_stride),
433
84.0k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
420k
  for (size_t iy = 0; iy < 4; iy++) {
436
3.02M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.69M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.69M
    }
439
336k
  }
440
  // 4x8 DCT of the other half of the block.
441
84.0k
  ComputeScaledDCT<4, 8>()(
442
84.0k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
84.0k
      block, scratch_space);
444
420k
  for (size_t iy = 0; iy < 4; iy++) {
445
3.02M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.69M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.69M
    }
448
336k
  }
449
84.0k
  float block00 = coefficients[0] * 0.25f;
450
84.0k
  float block01 = coefficients[1];
451
84.0k
  float block10 = coefficients[8];
452
84.0k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
84.0k
  coefficients[1] = (block00 - block01) * 0.5f;
454
84.0k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
84.0k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
106k
                            float* JXL_RESTRICT coefficients) {
411
106k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
106k
  size_t afv_x = afv_kind & 1;
413
106k
  size_t afv_y = afv_kind / 2;
414
106k
  HWY_ALIGN float block[4 * 8] = {};
415
534k
  for (size_t iy = 0; iy < 4; iy++) {
416
2.13M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.71M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.71M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.71M
    }
420
427k
  }
421
  // AFV coefficients in (even, even) positions.
422
106k
  HWY_ALIGN float coeff[4 * 4];
423
106k
  AFVDCT4x4(block, coeff);
424
534k
  for (size_t iy = 0; iy < 4; iy++) {
425
2.13M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.71M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.71M
    }
428
427k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
106k
  ComputeScaledDCT<4, 4>()(
431
106k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
106k
              pixels_stride),
433
106k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
534k
  for (size_t iy = 0; iy < 4; iy++) {
436
3.85M
    for (size_t ix = 0; ix < 8; ix++) {
437
3.42M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
3.42M
    }
439
427k
  }
440
  // 4x8 DCT of the other half of the block.
441
106k
  ComputeScaledDCT<4, 8>()(
442
106k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
106k
      block, scratch_space);
444
534k
  for (size_t iy = 0; iy < 4; iy++) {
445
3.85M
    for (size_t ix = 0; ix < 8; ix++) {
446
3.42M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
3.42M
    }
448
427k
  }
449
106k
  float block00 = coefficients[0] * 0.25f;
450
106k
  float block01 = coefficients[1];
451
106k
  float block10 = coefficients[8];
452
106k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
106k
  coefficients[1] = (block00 - block01) * 0.5f;
454
106k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
106k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
62.9k
                            float* JXL_RESTRICT coefficients) {
411
62.9k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
62.9k
  size_t afv_x = afv_kind & 1;
413
62.9k
  size_t afv_y = afv_kind / 2;
414
62.9k
  HWY_ALIGN float block[4 * 8] = {};
415
314k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.25M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.00M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.00M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.00M
    }
420
251k
  }
421
  // AFV coefficients in (even, even) positions.
422
62.9k
  HWY_ALIGN float coeff[4 * 4];
423
62.9k
  AFVDCT4x4(block, coeff);
424
314k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.25M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.00M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.00M
    }
428
251k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
62.9k
  ComputeScaledDCT<4, 4>()(
431
62.9k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
62.9k
              pixels_stride),
433
62.9k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
314k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.26M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.01M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.01M
    }
439
251k
  }
440
  // 4x8 DCT of the other half of the block.
441
62.9k
  ComputeScaledDCT<4, 8>()(
442
62.9k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
62.9k
      block, scratch_space);
444
314k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.26M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.01M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.01M
    }
448
251k
  }
449
62.9k
  float block00 = coefficients[0] * 0.25f;
450
62.9k
  float block01 = coefficients[1];
451
62.9k
  float block10 = coefficients[8];
452
62.9k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
62.9k
  coefficients[1] = (block00 - block01) * 0.5f;
454
62.9k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
62.9k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
77.1k
                            float* JXL_RESTRICT coefficients) {
411
77.1k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
77.1k
  size_t afv_x = afv_kind & 1;
413
77.1k
  size_t afv_y = afv_kind / 2;
414
77.1k
  HWY_ALIGN float block[4 * 8] = {};
415
385k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.54M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.23M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.23M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.23M
    }
420
308k
  }
421
  // AFV coefficients in (even, even) positions.
422
77.1k
  HWY_ALIGN float coeff[4 * 4];
423
77.1k
  AFVDCT4x4(block, coeff);
424
385k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.54M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.23M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.23M
    }
428
308k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
77.1k
  ComputeScaledDCT<4, 4>()(
431
77.1k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
77.1k
              pixels_stride),
433
77.1k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
385k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.77M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.46M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.46M
    }
439
308k
  }
440
  // 4x8 DCT of the other half of the block.
441
77.1k
  ComputeScaledDCT<4, 8>()(
442
77.1k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
77.1k
      block, scratch_space);
444
385k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.77M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.46M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.46M
    }
448
308k
  }
449
77.1k
  float block00 = coefficients[0] * 0.25f;
450
77.1k
  float block01 = coefficients[1];
451
77.1k
  float block10 = coefficients[8];
452
77.1k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
77.1k
  coefficients[1] = (block00 - block01) * 0.5f;
454
77.1k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
77.1k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
84.0k
                            float* JXL_RESTRICT coefficients) {
411
84.0k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
84.0k
  size_t afv_x = afv_kind & 1;
413
84.0k
  size_t afv_y = afv_kind / 2;
414
84.0k
  HWY_ALIGN float block[4 * 8] = {};
415
420k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.68M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.34M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.34M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.34M
    }
420
336k
  }
421
  // AFV coefficients in (even, even) positions.
422
84.0k
  HWY_ALIGN float coeff[4 * 4];
423
84.0k
  AFVDCT4x4(block, coeff);
424
420k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.68M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.34M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.34M
    }
428
336k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
84.0k
  ComputeScaledDCT<4, 4>()(
431
84.0k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
84.0k
              pixels_stride),
433
84.0k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
420k
  for (size_t iy = 0; iy < 4; iy++) {
436
3.02M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.69M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.69M
    }
439
336k
  }
440
  // 4x8 DCT of the other half of the block.
441
84.0k
  ComputeScaledDCT<4, 8>()(
442
84.0k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
84.0k
      block, scratch_space);
444
420k
  for (size_t iy = 0; iy < 4; iy++) {
445
3.02M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.69M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.69M
    }
448
336k
  }
449
84.0k
  float block00 = coefficients[0] * 0.25f;
450
84.0k
  float block01 = coefficients[1];
451
84.0k
  float block10 = coefficients[8];
452
84.0k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
84.0k
  coefficients[1] = (block00 - block01) * 0.5f;
454
84.0k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
84.0k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
14.6M
                            float* JXL_RESTRICT coefficients) {
411
14.6M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
14.6M
  size_t afv_x = afv_kind & 1;
413
14.6M
  size_t afv_y = afv_kind / 2;
414
14.6M
  HWY_ALIGN float block[4 * 8] = {};
415
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
416
292M
    for (size_t ix = 0; ix < 4; ix++) {
417
233M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
233M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
233M
    }
420
58.4M
  }
421
  // AFV coefficients in (even, even) positions.
422
14.6M
  HWY_ALIGN float coeff[4 * 4];
423
14.6M
  AFVDCT4x4(block, coeff);
424
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
425
292M
    for (size_t ix = 0; ix < 4; ix++) {
426
233M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
233M
    }
428
58.4M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
14.6M
  ComputeScaledDCT<4, 4>()(
431
14.6M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
14.6M
              pixels_stride),
433
14.6M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
436
525M
    for (size_t ix = 0; ix < 8; ix++) {
437
467M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
467M
    }
439
58.4M
  }
440
  // 4x8 DCT of the other half of the block.
441
14.6M
  ComputeScaledDCT<4, 8>()(
442
14.6M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
14.6M
      block, scratch_space);
444
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
445
525M
    for (size_t ix = 0; ix < 8; ix++) {
446
467M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
467M
    }
448
58.4M
  }
449
14.6M
  float block00 = coefficients[0] * 0.25f;
450
14.6M
  float block01 = coefficients[1];
451
14.6M
  float block10 = coefficients[8];
452
14.6M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
14.6M
  coefficients[1] = (block00 - block01) * 0.5f;
454
14.6M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
14.6M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
14.6M
                            float* JXL_RESTRICT coefficients) {
411
14.6M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
14.6M
  size_t afv_x = afv_kind & 1;
413
14.6M
  size_t afv_y = afv_kind / 2;
414
14.6M
  HWY_ALIGN float block[4 * 8] = {};
415
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
416
292M
    for (size_t ix = 0; ix < 4; ix++) {
417
233M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
233M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
233M
    }
420
58.4M
  }
421
  // AFV coefficients in (even, even) positions.
422
14.6M
  HWY_ALIGN float coeff[4 * 4];
423
14.6M
  AFVDCT4x4(block, coeff);
424
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
425
292M
    for (size_t ix = 0; ix < 4; ix++) {
426
233M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
233M
    }
428
58.4M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
14.6M
  ComputeScaledDCT<4, 4>()(
431
14.6M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
14.6M
              pixels_stride),
433
14.6M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
436
525M
    for (size_t ix = 0; ix < 8; ix++) {
437
467M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
467M
    }
439
58.4M
  }
440
  // 4x8 DCT of the other half of the block.
441
14.6M
  ComputeScaledDCT<4, 8>()(
442
14.6M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
14.6M
      block, scratch_space);
444
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
445
525M
    for (size_t ix = 0; ix < 8; ix++) {
446
467M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
467M
    }
448
58.4M
  }
449
14.6M
  float block00 = coefficients[0] * 0.25f;
450
14.6M
  float block01 = coefficients[1];
451
14.6M
  float block10 = coefficients[8];
452
14.6M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
14.6M
  coefficients[1] = (block00 - block01) * 0.5f;
454
14.6M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
14.6M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
14.6M
                            float* JXL_RESTRICT coefficients) {
411
14.6M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
14.6M
  size_t afv_x = afv_kind & 1;
413
14.6M
  size_t afv_y = afv_kind / 2;
414
14.6M
  HWY_ALIGN float block[4 * 8] = {};
415
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
416
292M
    for (size_t ix = 0; ix < 4; ix++) {
417
233M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
233M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
233M
    }
420
58.4M
  }
421
  // AFV coefficients in (even, even) positions.
422
14.6M
  HWY_ALIGN float coeff[4 * 4];
423
14.6M
  AFVDCT4x4(block, coeff);
424
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
425
292M
    for (size_t ix = 0; ix < 4; ix++) {
426
233M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
233M
    }
428
58.4M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
14.6M
  ComputeScaledDCT<4, 4>()(
431
14.6M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
14.6M
              pixels_stride),
433
14.6M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
436
525M
    for (size_t ix = 0; ix < 8; ix++) {
437
467M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
467M
    }
439
58.4M
  }
440
  // 4x8 DCT of the other half of the block.
441
14.6M
  ComputeScaledDCT<4, 8>()(
442
14.6M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
14.6M
      block, scratch_space);
444
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
445
525M
    for (size_t ix = 0; ix < 8; ix++) {
446
467M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
467M
    }
448
58.4M
  }
449
14.6M
  float block00 = coefficients[0] * 0.25f;
450
14.6M
  float block01 = coefficients[1];
451
14.6M
  float block10 = coefficients[8];
452
14.6M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
14.6M
  coefficients[1] = (block00 - block01) * 0.5f;
454
14.6M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
14.6M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
14.6M
                            float* JXL_RESTRICT coefficients) {
411
14.6M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
14.6M
  size_t afv_x = afv_kind & 1;
413
14.6M
  size_t afv_y = afv_kind / 2;
414
14.6M
  HWY_ALIGN float block[4 * 8] = {};
415
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
416
292M
    for (size_t ix = 0; ix < 4; ix++) {
417
233M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
233M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
233M
    }
420
58.4M
  }
421
  // AFV coefficients in (even, even) positions.
422
14.6M
  HWY_ALIGN float coeff[4 * 4];
423
14.6M
  AFVDCT4x4(block, coeff);
424
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
425
292M
    for (size_t ix = 0; ix < 4; ix++) {
426
233M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
233M
    }
428
58.4M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
14.6M
  ComputeScaledDCT<4, 4>()(
431
14.6M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
14.6M
              pixels_stride),
433
14.6M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
436
525M
    for (size_t ix = 0; ix < 8; ix++) {
437
467M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
467M
    }
439
58.4M
  }
440
  // 4x8 DCT of the other half of the block.
441
14.6M
  ComputeScaledDCT<4, 8>()(
442
14.6M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
14.6M
      block, scratch_space);
444
73.0M
  for (size_t iy = 0; iy < 4; iy++) {
445
525M
    for (size_t ix = 0; ix < 8; ix++) {
446
467M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
467M
    }
448
58.4M
  }
449
14.6M
  float block00 = coefficients[0] * 0.25f;
450
14.6M
  float block01 = coefficients[1];
451
14.6M
  float block10 = coefficients[8];
452
14.6M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
14.6M
  coefficients[1] = (block00 - block01) * 0.5f;
454
14.6M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
14.6M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
456
457
HWY_MAYBE_UNUSED void TransformFromPixels(const AcStrategyType strategy,
458
                                          const float* JXL_RESTRICT pixels,
459
                                          size_t pixels_stride,
460
                                          float* JXL_RESTRICT coefficients,
461
209M
                                          float* JXL_RESTRICT scratch_space) {
462
209M
  using Type = AcStrategyType;
463
209M
  switch (strategy) {
464
16.1M
    case Type::IDENTITY: {
465
48.3M
      for (size_t y = 0; y < 2; y++) {
466
96.6M
        for (size_t x = 0; x < 2; x++) {
467
64.4M
          float block_dc = 0;
468
322M
          for (size_t iy = 0; iy < 4; iy++) {
469
1.28G
            for (size_t ix = 0; ix < 4; ix++) {
470
1.03G
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
1.03G
            }
472
257M
          }
473
64.4M
          block_dc *= 1.0f / 16;
474
322M
          for (size_t iy = 0; iy < 4; iy++) {
475
1.28G
            for (size_t ix = 0; ix < 4; ix++) {
476
1.03G
              if (ix == 1 && iy == 1) continue;
477
966M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
966M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
966M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
966M
            }
481
257M
          }
482
64.4M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
64.4M
          coefficients[y * 8 + x] = block_dc;
484
64.4M
        }
485
32.2M
      }
486
16.1M
      float block00 = coefficients[0];
487
16.1M
      float block01 = coefficients[1];
488
16.1M
      float block10 = coefficients[8];
489
16.1M
      float block11 = coefficients[9];
490
16.1M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
16.1M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
16.1M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
16.1M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
16.1M
      break;
495
0
    }
496
14.9M
    case Type::DCT8X4: {
497
44.8M
      for (size_t x = 0; x < 2; x++) {
498
29.8M
        HWY_ALIGN float block[4 * 8];
499
29.8M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
29.8M
                                 scratch_space);
501
149M
        for (size_t iy = 0; iy < 4; iy++) {
502
1.07G
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
956M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
956M
          }
506
119M
        }
507
29.8M
      }
508
14.9M
      float block0 = coefficients[0];
509
14.9M
      float block1 = coefficients[8];
510
14.9M
      coefficients[0] = (block0 + block1) * 0.5f;
511
14.9M
      coefficients[8] = (block0 - block1) * 0.5f;
512
14.9M
      break;
513
0
    }
514
14.7M
    case Type::DCT4X8: {
515
44.2M
      for (size_t y = 0; y < 2; y++) {
516
29.5M
        HWY_ALIGN float block[4 * 8];
517
29.5M
        ComputeScaledDCT<4, 8>()(
518
29.5M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
29.5M
            scratch_space);
520
147M
        for (size_t iy = 0; iy < 4; iy++) {
521
1.06G
          for (size_t ix = 0; ix < 8; ix++) {
522
944M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
944M
          }
524
118M
        }
525
29.5M
      }
526
14.7M
      float block0 = coefficients[0];
527
14.7M
      float block1 = coefficients[8];
528
14.7M
      coefficients[0] = (block0 + block1) * 0.5f;
529
14.7M
      coefficients[8] = (block0 - block1) * 0.5f;
530
14.7M
      break;
531
0
    }
532
14.6M
    case Type::DCT4X4: {
533
43.8M
      for (size_t y = 0; y < 2; y++) {
534
87.6M
        for (size_t x = 0; x < 2; x++) {
535
58.4M
          HWY_ALIGN float block[4 * 4];
536
58.4M
          ComputeScaledDCT<4, 4>()(
537
58.4M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
58.4M
              block, scratch_space);
539
292M
          for (size_t iy = 0; iy < 4; iy++) {
540
1.16G
            for (size_t ix = 0; ix < 4; ix++) {
541
934M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
934M
            }
543
233M
          }
544
58.4M
        }
545
29.2M
      }
546
14.6M
      float block00 = coefficients[0];
547
14.6M
      float block01 = coefficients[1];
548
14.6M
      float block10 = coefficients[8];
549
14.6M
      float block11 = coefficients[9];
550
14.6M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
14.6M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
14.6M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
14.6M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
14.6M
      break;
555
0
    }
556
19.1M
    case Type::DCT2X2: {
557
19.1M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
19.1M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
19.1M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
19.1M
      break;
561
0
    }
562
6.20M
    case Type::DCT16X16: {
563
6.20M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
6.20M
                                 scratch_space);
565
6.20M
      break;
566
0
    }
567
12.0M
    case Type::DCT16X8: {
568
12.0M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
12.0M
                                scratch_space);
570
12.0M
      break;
571
0
    }
572
12.0M
    case Type::DCT8X16: {
573
12.0M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
12.0M
                                scratch_space);
575
12.0M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
2.42M
    case Type::DCT32X16: {
588
2.42M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
2.42M
                                 scratch_space);
590
2.42M
      break;
591
0
    }
592
2.40M
    case Type::DCT16X32: {
593
2.40M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
2.40M
                                 scratch_space);
595
2.40M
      break;
596
0
    }
597
1.38M
    case Type::DCT32X32: {
598
1.38M
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
1.38M
                                 scratch_space);
600
1.38M
      break;
601
0
    }
602
32.8M
    case Type::DCT: {
603
32.8M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
32.8M
                               scratch_space);
605
32.8M
      break;
606
0
    }
607
14.8M
    case Type::AFV0: {
608
14.8M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
14.8M
      break;
610
0
    }
611
14.7M
    case Type::AFV1: {
612
14.7M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
14.7M
      break;
614
0
    }
615
14.7M
    case Type::AFV2: {
616
14.7M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
14.7M
      break;
618
0
    }
619
14.7M
    case Type::AFV3: {
620
14.7M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
14.7M
      break;
622
0
    }
623
330k
    case Type::DCT64X64: {
624
330k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
330k
                                 scratch_space);
626
330k
      break;
627
0
    }
628
725k
    case Type::DCT64X32: {
629
725k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
725k
                                 scratch_space);
631
725k
      break;
632
0
    }
633
437k
    case Type::DCT32X64: {
634
437k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
437k
                                 scratch_space);
636
437k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
209M
  }
669
209M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
6.42M
                                          float* JXL_RESTRICT scratch_space) {
462
6.42M
  using Type = AcStrategyType;
463
6.42M
  switch (strategy) {
464
752k
    case Type::IDENTITY: {
465
2.25M
      for (size_t y = 0; y < 2; y++) {
466
4.51M
        for (size_t x = 0; x < 2; x++) {
467
3.01M
          float block_dc = 0;
468
15.0M
          for (size_t iy = 0; iy < 4; iy++) {
469
60.2M
            for (size_t ix = 0; ix < 4; ix++) {
470
48.1M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
48.1M
            }
472
12.0M
          }
473
3.01M
          block_dc *= 1.0f / 16;
474
15.0M
          for (size_t iy = 0; iy < 4; iy++) {
475
60.2M
            for (size_t ix = 0; ix < 4; ix++) {
476
48.1M
              if (ix == 1 && iy == 1) continue;
477
45.1M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
45.1M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
45.1M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
45.1M
            }
481
12.0M
          }
482
3.01M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
3.01M
          coefficients[y * 8 + x] = block_dc;
484
3.01M
        }
485
1.50M
      }
486
752k
      float block00 = coefficients[0];
487
752k
      float block01 = coefficients[1];
488
752k
      float block10 = coefficients[8];
489
752k
      float block11 = coefficients[9];
490
752k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
752k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
752k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
752k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
752k
      break;
495
0
    }
496
166k
    case Type::DCT8X4: {
497
499k
      for (size_t x = 0; x < 2; x++) {
498
333k
        HWY_ALIGN float block[4 * 8];
499
333k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
333k
                                 scratch_space);
501
1.66M
        for (size_t iy = 0; iy < 4; iy++) {
502
11.9M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
10.6M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
10.6M
          }
506
1.33M
        }
507
333k
      }
508
166k
      float block0 = coefficients[0];
509
166k
      float block1 = coefficients[8];
510
166k
      coefficients[0] = (block0 + block1) * 0.5f;
511
166k
      coefficients[8] = (block0 - block1) * 0.5f;
512
166k
      break;
513
0
    }
514
77.3k
    case Type::DCT4X8: {
515
232k
      for (size_t y = 0; y < 2; y++) {
516
154k
        HWY_ALIGN float block[4 * 8];
517
154k
        ComputeScaledDCT<4, 8>()(
518
154k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
154k
            scratch_space);
520
773k
        for (size_t iy = 0; iy < 4; iy++) {
521
5.56M
          for (size_t ix = 0; ix < 8; ix++) {
522
4.95M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
4.95M
          }
524
618k
        }
525
154k
      }
526
77.3k
      float block0 = coefficients[0];
527
77.3k
      float block1 = coefficients[8];
528
77.3k
      coefficients[0] = (block0 + block1) * 0.5f;
529
77.3k
      coefficients[8] = (block0 - block1) * 0.5f;
530
77.3k
      break;
531
0
    }
532
321
    case Type::DCT4X4: {
533
963
      for (size_t y = 0; y < 2; y++) {
534
1.92k
        for (size_t x = 0; x < 2; x++) {
535
1.28k
          HWY_ALIGN float block[4 * 4];
536
1.28k
          ComputeScaledDCT<4, 4>()(
537
1.28k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
1.28k
              block, scratch_space);
539
6.42k
          for (size_t iy = 0; iy < 4; iy++) {
540
25.6k
            for (size_t ix = 0; ix < 4; ix++) {
541
20.5k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
20.5k
            }
543
5.13k
          }
544
1.28k
        }
545
642
      }
546
321
      float block00 = coefficients[0];
547
321
      float block01 = coefficients[1];
548
321
      float block10 = coefficients[8];
549
321
      float block11 = coefficients[9];
550
321
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
321
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
321
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
321
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
321
      break;
555
0
    }
556
2.27M
    case Type::DCT2X2: {
557
2.27M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
2.27M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
2.27M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
2.27M
      break;
561
0
    }
562
184k
    case Type::DCT16X16: {
563
184k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
184k
                                 scratch_space);
565
184k
      break;
566
0
    }
567
250k
    case Type::DCT16X8: {
568
250k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
250k
                                scratch_space);
570
250k
      break;
571
0
    }
572
265k
    case Type::DCT8X16: {
573
265k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
265k
                                scratch_space);
575
265k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
58.8k
    case Type::DCT32X16: {
588
58.8k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
58.8k
                                 scratch_space);
590
58.8k
      break;
591
0
    }
592
61.2k
    case Type::DCT16X32: {
593
61.2k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
61.2k
                                 scratch_space);
595
61.2k
      break;
596
0
    }
597
110k
    case Type::DCT32X32: {
598
110k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
110k
                                 scratch_space);
600
110k
      break;
601
0
    }
602
1.80M
    case Type::DCT: {
603
1.80M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
1.80M
                               scratch_space);
605
1.80M
      break;
606
0
    }
607
106k
    case Type::AFV0: {
608
106k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
106k
      break;
610
0
    }
611
62.9k
    case Type::AFV1: {
612
62.9k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
62.9k
      break;
614
0
    }
615
77.1k
    case Type::AFV2: {
616
77.1k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
77.1k
      break;
618
0
    }
619
84.0k
    case Type::AFV3: {
620
84.0k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
84.0k
      break;
622
0
    }
623
63.6k
    case Type::DCT64X64: {
624
63.6k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
63.6k
                                 scratch_space);
626
63.6k
      break;
627
0
    }
628
12.7k
    case Type::DCT64X32: {
629
12.7k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
12.7k
                                 scratch_space);
631
12.7k
      break;
632
0
    }
633
6.81k
    case Type::DCT32X64: {
634
6.81k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
6.81k
                                 scratch_space);
636
6.81k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
6.42M
  }
669
6.42M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
21.0M
                                          float* JXL_RESTRICT scratch_space) {
462
21.0M
  using Type = AcStrategyType;
463
21.0M
  switch (strategy) {
464
752k
    case Type::IDENTITY: {
465
2.25M
      for (size_t y = 0; y < 2; y++) {
466
4.51M
        for (size_t x = 0; x < 2; x++) {
467
3.01M
          float block_dc = 0;
468
15.0M
          for (size_t iy = 0; iy < 4; iy++) {
469
60.2M
            for (size_t ix = 0; ix < 4; ix++) {
470
48.1M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
48.1M
            }
472
12.0M
          }
473
3.01M
          block_dc *= 1.0f / 16;
474
15.0M
          for (size_t iy = 0; iy < 4; iy++) {
475
60.2M
            for (size_t ix = 0; ix < 4; ix++) {
476
48.1M
              if (ix == 1 && iy == 1) continue;
477
45.1M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
45.1M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
45.1M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
45.1M
            }
481
12.0M
          }
482
3.01M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
3.01M
          coefficients[y * 8 + x] = block_dc;
484
3.01M
        }
485
1.50M
      }
486
752k
      float block00 = coefficients[0];
487
752k
      float block01 = coefficients[1];
488
752k
      float block10 = coefficients[8];
489
752k
      float block11 = coefficients[9];
490
752k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
752k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
752k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
752k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
752k
      break;
495
0
    }
496
166k
    case Type::DCT8X4: {
497
499k
      for (size_t x = 0; x < 2; x++) {
498
333k
        HWY_ALIGN float block[4 * 8];
499
333k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
333k
                                 scratch_space);
501
1.66M
        for (size_t iy = 0; iy < 4; iy++) {
502
11.9M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
10.6M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
10.6M
          }
506
1.33M
        }
507
333k
      }
508
166k
      float block0 = coefficients[0];
509
166k
      float block1 = coefficients[8];
510
166k
      coefficients[0] = (block0 + block1) * 0.5f;
511
166k
      coefficients[8] = (block0 - block1) * 0.5f;
512
166k
      break;
513
0
    }
514
77.3k
    case Type::DCT4X8: {
515
232k
      for (size_t y = 0; y < 2; y++) {
516
154k
        HWY_ALIGN float block[4 * 8];
517
154k
        ComputeScaledDCT<4, 8>()(
518
154k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
154k
            scratch_space);
520
773k
        for (size_t iy = 0; iy < 4; iy++) {
521
5.56M
          for (size_t ix = 0; ix < 8; ix++) {
522
4.95M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
4.95M
          }
524
618k
        }
525
154k
      }
526
77.3k
      float block0 = coefficients[0];
527
77.3k
      float block1 = coefficients[8];
528
77.3k
      coefficients[0] = (block0 + block1) * 0.5f;
529
77.3k
      coefficients[8] = (block0 - block1) * 0.5f;
530
77.3k
      break;
531
0
    }
532
321
    case Type::DCT4X4: {
533
963
      for (size_t y = 0; y < 2; y++) {
534
1.92k
        for (size_t x = 0; x < 2; x++) {
535
1.28k
          HWY_ALIGN float block[4 * 4];
536
1.28k
          ComputeScaledDCT<4, 4>()(
537
1.28k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
1.28k
              block, scratch_space);
539
6.42k
          for (size_t iy = 0; iy < 4; iy++) {
540
25.6k
            for (size_t ix = 0; ix < 4; ix++) {
541
20.5k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
20.5k
            }
543
5.13k
          }
544
1.28k
        }
545
642
      }
546
321
      float block00 = coefficients[0];
547
321
      float block01 = coefficients[1];
548
321
      float block10 = coefficients[8];
549
321
      float block11 = coefficients[9];
550
321
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
321
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
321
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
321
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
321
      break;
555
0
    }
556
2.27M
    case Type::DCT2X2: {
557
2.27M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
2.27M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
2.27M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
2.27M
      break;
561
0
    }
562
184k
    case Type::DCT16X16: {
563
184k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
184k
                                 scratch_space);
565
184k
      break;
566
0
    }
567
250k
    case Type::DCT16X8: {
568
250k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
250k
                                scratch_space);
570
250k
      break;
571
0
    }
572
265k
    case Type::DCT8X16: {
573
265k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
265k
                                scratch_space);
575
265k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
58.8k
    case Type::DCT32X16: {
588
58.8k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
58.8k
                                 scratch_space);
590
58.8k
      break;
591
0
    }
592
61.2k
    case Type::DCT16X32: {
593
61.2k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
61.2k
                                 scratch_space);
595
61.2k
      break;
596
0
    }
597
110k
    case Type::DCT32X32: {
598
110k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
110k
                                 scratch_space);
600
110k
      break;
601
0
    }
602
16.4M
    case Type::DCT: {
603
16.4M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
16.4M
                               scratch_space);
605
16.4M
      break;
606
0
    }
607
106k
    case Type::AFV0: {
608
106k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
106k
      break;
610
0
    }
611
62.9k
    case Type::AFV1: {
612
62.9k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
62.9k
      break;
614
0
    }
615
77.1k
    case Type::AFV2: {
616
77.1k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
77.1k
      break;
618
0
    }
619
84.0k
    case Type::AFV3: {
620
84.0k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
84.0k
      break;
622
0
    }
623
63.6k
    case Type::DCT64X64: {
624
63.6k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
63.6k
                                 scratch_space);
626
63.6k
      break;
627
0
    }
628
12.7k
    case Type::DCT64X32: {
629
12.7k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
12.7k
                                 scratch_space);
631
12.7k
      break;
632
0
    }
633
6.81k
    case Type::DCT32X64: {
634
6.81k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
6.81k
                                 scratch_space);
636
6.81k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
21.0M
  }
669
21.0M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
182M
                                          float* JXL_RESTRICT scratch_space) {
462
182M
  using Type = AcStrategyType;
463
182M
  switch (strategy) {
464
14.6M
    case Type::IDENTITY: {
465
43.8M
      for (size_t y = 0; y < 2; y++) {
466
87.6M
        for (size_t x = 0; x < 2; x++) {
467
58.4M
          float block_dc = 0;
468
292M
          for (size_t iy = 0; iy < 4; iy++) {
469
1.16G
            for (size_t ix = 0; ix < 4; ix++) {
470
934M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
934M
            }
472
233M
          }
473
58.4M
          block_dc *= 1.0f / 16;
474
292M
          for (size_t iy = 0; iy < 4; iy++) {
475
1.16G
            for (size_t ix = 0; ix < 4; ix++) {
476
934M
              if (ix == 1 && iy == 1) continue;
477
876M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
876M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
876M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
876M
            }
481
233M
          }
482
58.4M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
58.4M
          coefficients[y * 8 + x] = block_dc;
484
58.4M
        }
485
29.2M
      }
486
14.6M
      float block00 = coefficients[0];
487
14.6M
      float block01 = coefficients[1];
488
14.6M
      float block10 = coefficients[8];
489
14.6M
      float block11 = coefficients[9];
490
14.6M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
14.6M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
14.6M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
14.6M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
14.6M
      break;
495
0
    }
496
14.6M
    case Type::DCT8X4: {
497
43.8M
      for (size_t x = 0; x < 2; x++) {
498
29.2M
        HWY_ALIGN float block[4 * 8];
499
29.2M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
29.2M
                                 scratch_space);
501
146M
        for (size_t iy = 0; iy < 4; iy++) {
502
1.05G
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
934M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
934M
          }
506
116M
        }
507
29.2M
      }
508
14.6M
      float block0 = coefficients[0];
509
14.6M
      float block1 = coefficients[8];
510
14.6M
      coefficients[0] = (block0 + block1) * 0.5f;
511
14.6M
      coefficients[8] = (block0 - block1) * 0.5f;
512
14.6M
      break;
513
0
    }
514
14.6M
    case Type::DCT4X8: {
515
43.8M
      for (size_t y = 0; y < 2; y++) {
516
29.2M
        HWY_ALIGN float block[4 * 8];
517
29.2M
        ComputeScaledDCT<4, 8>()(
518
29.2M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
29.2M
            scratch_space);
520
146M
        for (size_t iy = 0; iy < 4; iy++) {
521
1.05G
          for (size_t ix = 0; ix < 8; ix++) {
522
934M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
934M
          }
524
116M
        }
525
29.2M
      }
526
14.6M
      float block0 = coefficients[0];
527
14.6M
      float block1 = coefficients[8];
528
14.6M
      coefficients[0] = (block0 + block1) * 0.5f;
529
14.6M
      coefficients[8] = (block0 - block1) * 0.5f;
530
14.6M
      break;
531
0
    }
532
14.6M
    case Type::DCT4X4: {
533
43.8M
      for (size_t y = 0; y < 2; y++) {
534
87.6M
        for (size_t x = 0; x < 2; x++) {
535
58.4M
          HWY_ALIGN float block[4 * 4];
536
58.4M
          ComputeScaledDCT<4, 4>()(
537
58.4M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
58.4M
              block, scratch_space);
539
292M
          for (size_t iy = 0; iy < 4; iy++) {
540
1.16G
            for (size_t ix = 0; ix < 4; ix++) {
541
934M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
934M
            }
543
233M
          }
544
58.4M
        }
545
29.2M
      }
546
14.6M
      float block00 = coefficients[0];
547
14.6M
      float block01 = coefficients[1];
548
14.6M
      float block10 = coefficients[8];
549
14.6M
      float block11 = coefficients[9];
550
14.6M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
14.6M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
14.6M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
14.6M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
14.6M
      break;
555
0
    }
556
14.6M
    case Type::DCT2X2: {
557
14.6M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
14.6M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
14.6M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
14.6M
      break;
561
0
    }
562
5.83M
    case Type::DCT16X16: {
563
5.83M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
5.83M
                                 scratch_space);
565
5.83M
      break;
566
0
    }
567
11.5M
    case Type::DCT16X8: {
568
11.5M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
11.5M
                                scratch_space);
570
11.5M
      break;
571
0
    }
572
11.5M
    case Type::DCT8X16: {
573
11.5M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
11.5M
                                scratch_space);
575
11.5M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
2.30M
    case Type::DCT32X16: {
588
2.30M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
2.30M
                                 scratch_space);
590
2.30M
      break;
591
0
    }
592
2.28M
    case Type::DCT16X32: {
593
2.28M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
2.28M
                                 scratch_space);
595
2.28M
      break;
596
0
    }
597
1.16M
    case Type::DCT32X32: {
598
1.16M
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
1.16M
                                 scratch_space);
600
1.16M
      break;
601
0
    }
602
14.6M
    case Type::DCT: {
603
14.6M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
14.6M
                               scratch_space);
605
14.6M
      break;
606
0
    }
607
14.6M
    case Type::AFV0: {
608
14.6M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
14.6M
      break;
610
0
    }
611
14.6M
    case Type::AFV1: {
612
14.6M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
14.6M
      break;
614
0
    }
615
14.6M
    case Type::AFV2: {
616
14.6M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
14.6M
      break;
618
0
    }
619
14.6M
    case Type::AFV3: {
620
14.6M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
14.6M
      break;
622
0
    }
623
202k
    case Type::DCT64X64: {
624
202k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
202k
                                 scratch_space);
626
202k
      break;
627
0
    }
628
700k
    case Type::DCT64X32: {
629
700k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
700k
                                 scratch_space);
631
700k
      break;
632
0
    }
633
424k
    case Type::DCT32X64: {
634
424k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
424k
                                 scratch_space);
636
424k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
182M
  }
669
182M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
670
671
// `scratch_space` should be at least 4 * kMaxBlocks * kMaxBlocks elements.
672
HWY_MAYBE_UNUSED void DCFromLowestFrequencies(const AcStrategyType strategy,
673
                                              const float* block, float* dc,
674
                                              size_t dc_stride,
675
27.4M
                                              float* scratch_space) {
676
27.4M
  using Type = AcStrategyType;
677
27.4M
  switch (strategy) {
678
501k
    case Type::DCT16X8: {
679
501k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
501k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
501k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
501k
      break;
683
0
    }
684
531k
    case Type::DCT8X16: {
685
531k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
531k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
531k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
531k
      break;
689
0
    }
690
369k
    case Type::DCT16X16: {
691
369k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
369k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
369k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
369k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
117k
    case Type::DCT32X16: {
709
117k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
117k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
117k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
117k
      break;
713
0
    }
714
122k
    case Type::DCT16X32: {
715
122k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
122k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
122k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
122k
      break;
719
0
    }
720
220k
    case Type::DCT32X32: {
721
220k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
220k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
220k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
220k
      break;
725
0
    }
726
25.5k
    case Type::DCT64X32: {
727
25.5k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
25.5k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
25.5k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
25.5k
      break;
731
0
    }
732
13.6k
    case Type::DCT32X64: {
733
13.6k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
13.6k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
13.6k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
13.6k
      break;
737
0
    }
738
127k
    case Type::DCT64X64: {
739
127k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
127k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
127k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
127k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
18.2M
    case Type::DCT:
787
22.7M
    case Type::DCT2X2:
788
22.7M
    case Type::DCT4X4:
789
22.9M
    case Type::DCT4X8:
790
23.2M
    case Type::DCT8X4:
791
23.4M
    case Type::AFV0:
792
23.6M
    case Type::AFV1:
793
23.7M
    case Type::AFV2:
794
23.9M
    case Type::AFV3:
795
25.4M
    case Type::IDENTITY:
796
25.4M
      dc[0] = block[0];
797
25.4M
      break;
798
27.4M
  }
799
27.4M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
6.42M
                                              float* scratch_space) {
676
6.42M
  using Type = AcStrategyType;
677
6.42M
  switch (strategy) {
678
250k
    case Type::DCT16X8: {
679
250k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
250k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
250k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
250k
      break;
683
0
    }
684
265k
    case Type::DCT8X16: {
685
265k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
265k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
265k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
265k
      break;
689
0
    }
690
184k
    case Type::DCT16X16: {
691
184k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
184k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
184k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
184k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
58.8k
    case Type::DCT32X16: {
709
58.8k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
58.8k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
58.8k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
58.8k
      break;
713
0
    }
714
61.2k
    case Type::DCT16X32: {
715
61.2k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
61.2k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
61.2k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
61.2k
      break;
719
0
    }
720
110k
    case Type::DCT32X32: {
721
110k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
110k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
110k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
110k
      break;
725
0
    }
726
12.7k
    case Type::DCT64X32: {
727
12.7k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
12.7k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
12.7k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
12.7k
      break;
731
0
    }
732
6.81k
    case Type::DCT32X64: {
733
6.81k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
6.81k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
6.81k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
6.81k
      break;
737
0
    }
738
63.6k
    case Type::DCT64X64: {
739
63.6k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
63.6k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
63.6k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
63.6k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
1.80M
    case Type::DCT:
787
4.08M
    case Type::DCT2X2:
788
4.08M
    case Type::DCT4X4:
789
4.16M
    case Type::DCT4X8:
790
4.32M
    case Type::DCT8X4:
791
4.43M
    case Type::AFV0:
792
4.49M
    case Type::AFV1:
793
4.57M
    case Type::AFV2:
794
4.65M
    case Type::AFV3:
795
5.41M
    case Type::IDENTITY:
796
5.41M
      dc[0] = block[0];
797
5.41M
      break;
798
6.42M
  }
799
6.42M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
21.0M
                                              float* scratch_space) {
676
21.0M
  using Type = AcStrategyType;
677
21.0M
  switch (strategy) {
678
250k
    case Type::DCT16X8: {
679
250k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
250k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
250k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
250k
      break;
683
0
    }
684
265k
    case Type::DCT8X16: {
685
265k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
265k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
265k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
265k
      break;
689
0
    }
690
184k
    case Type::DCT16X16: {
691
184k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
184k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
184k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
184k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
58.8k
    case Type::DCT32X16: {
709
58.8k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
58.8k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
58.8k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
58.8k
      break;
713
0
    }
714
61.2k
    case Type::DCT16X32: {
715
61.2k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
61.2k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
61.2k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
61.2k
      break;
719
0
    }
720
110k
    case Type::DCT32X32: {
721
110k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
110k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
110k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
110k
      break;
725
0
    }
726
12.7k
    case Type::DCT64X32: {
727
12.7k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
12.7k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
12.7k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
12.7k
      break;
731
0
    }
732
6.81k
    case Type::DCT32X64: {
733
6.81k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
6.81k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
6.81k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
6.81k
      break;
737
0
    }
738
63.6k
    case Type::DCT64X64: {
739
63.6k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
63.6k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
63.6k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
63.6k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
16.4M
    case Type::DCT:
787
18.6M
    case Type::DCT2X2:
788
18.6M
    case Type::DCT4X4:
789
18.7M
    case Type::DCT4X8:
790
18.9M
    case Type::DCT8X4:
791
19.0M
    case Type::AFV0:
792
19.1M
    case Type::AFV1:
793
19.1M
    case Type::AFV2:
794
19.2M
    case Type::AFV3:
795
20.0M
    case Type::IDENTITY:
796
20.0M
      dc[0] = block[0];
797
20.0M
      break;
798
21.0M
  }
799
21.0M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
800
801
}  // namespace
802
// NOLINTNEXTLINE(google-readability-namespace-comments)
803
}  // namespace HWY_NAMESPACE
804
}  // namespace jxl
805
HWY_AFTER_NAMESPACE();
806
807
#endif  // LIB_JXL_ENC_TRANSFORMS_INL_H_