Coverage Report

Created: 2025-10-12 07:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/enc_transforms-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/base/compiler_specific.h"
7
#include "lib/jxl/frame_dimensions.h"
8
9
#if defined(LIB_JXL_ENC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
10
#ifdef LIB_JXL_ENC_TRANSFORMS_INL_H_
11
#undef LIB_JXL_ENC_TRANSFORMS_INL_H_
12
#else
13
#define LIB_JXL_ENC_TRANSFORMS_INL_H_
14
#endif
15
16
#include <cstddef>
17
#include <cstdint>
18
#include <hwy/highway.h>
19
20
#include "lib/jxl/ac_strategy.h"
21
#include "lib/jxl/dct-inl.h"
22
#include "lib/jxl/dct_scales.h"
23
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
27
enum class AcStrategyType : uint32_t;
28
29
namespace HWY_NAMESPACE {
30
namespace {
31
32
constexpr size_t kMaxBlocks = 32;
33
34
// Inverse of ReinterpretingDCT.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
HWY_INLINE void ReinterpretingIDCT(const float* input,
38
                                   const size_t input_stride, float* output,
39
1.56M
                                   const size_t output_stride, float* scratch) {
40
1.56M
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
1.56M
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
1.56M
  float* block = scratch;
43
1.56M
  if (ROWS < COLS) {
44
1.20M
    for (size_t y = 0; y < LF_ROWS; y++) {
45
2.61M
      for (size_t x = 0; x < LF_COLS; x++) {
46
1.95M
        block[y * COLS + x] = input[y * input_stride + x] *
47
1.95M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
1.95M
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
1.95M
      }
50
662k
    }
51
1.02M
  } else {
52
3.49M
    for (size_t y = 0; y < LF_COLS; y++) {
53
13.4M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
10.9M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
10.9M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
10.9M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
10.9M
      }
58
2.46M
    }
59
1.02M
  }
60
61
1.56M
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
1.56M
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
1.56M
                                  scratch_space);
64
1.56M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
203k
                                   const size_t output_stride, float* scratch) {
40
203k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
203k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
203k
  float* block = scratch;
43
203k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
203k
  } else {
52
407k
    for (size_t y = 0; y < LF_COLS; y++) {
53
611k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
407k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
407k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
407k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
407k
      }
58
203k
    }
59
203k
  }
60
61
203k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
203k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
203k
                                  scratch_space);
64
203k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
218k
                                   const size_t output_stride, float* scratch) {
40
218k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
218k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
218k
  float* block = scratch;
43
218k
  if (ROWS < COLS) {
44
437k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
656k
      for (size_t x = 0; x < LF_COLS; x++) {
46
437k
        block[y * COLS + x] = input[y * input_stride + x] *
47
437k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
437k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
437k
      }
50
218k
    }
51
218k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
218k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
218k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
218k
                                  scratch_space);
64
218k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
140k
                                   const size_t output_stride, float* scratch) {
40
140k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
140k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
140k
  float* block = scratch;
43
140k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
140k
  } else {
52
422k
    for (size_t y = 0; y < LF_COLS; y++) {
53
845k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
563k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
563k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
563k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
563k
      }
58
281k
    }
59
140k
  }
60
61
140k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
140k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
140k
                                  scratch_space);
64
140k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
45.9k
                                   const size_t output_stride, float* scratch) {
40
45.9k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
45.9k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
45.9k
  float* block = scratch;
43
45.9k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
45.9k
  } else {
52
137k
    for (size_t y = 0; y < LF_COLS; y++) {
53
459k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
367k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
367k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
367k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
367k
      }
58
91.8k
    }
59
45.9k
  }
60
61
45.9k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
45.9k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
45.9k
                                  scratch_space);
64
45.9k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
44.7k
                                   const size_t output_stride, float* scratch) {
40
44.7k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
44.7k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
44.7k
  float* block = scratch;
43
44.7k
  if (ROWS < COLS) {
44
134k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
447k
      for (size_t x = 0; x < LF_COLS; x++) {
46
358k
        block[y * COLS + x] = input[y * input_stride + x] *
47
358k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
358k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
358k
      }
50
89.5k
    }
51
44.7k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
44.7k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
44.7k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
44.7k
                                  scratch_space);
64
44.7k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
69.7k
                                   const size_t output_stride, float* scratch) {
40
69.7k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
69.7k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
69.7k
  float* block = scratch;
43
69.7k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
69.7k
  } else {
52
348k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.39M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
1.11M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
1.11M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
1.11M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
1.11M
      }
58
278k
    }
59
69.7k
  }
60
61
69.7k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
69.7k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
69.7k
                                  scratch_space);
64
69.7k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
13.9k
                                   const size_t output_stride, float* scratch) {
40
13.9k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
13.9k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
13.9k
  float* block = scratch;
43
13.9k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
13.9k
  } else {
52
69.5k
    for (size_t y = 0; y < LF_COLS; y++) {
53
500k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
445k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
445k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
445k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
445k
      }
58
55.6k
    }
59
13.9k
  }
60
61
13.9k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
13.9k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
13.9k
                                  scratch_space);
64
13.9k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
5.66k
                                   const size_t output_stride, float* scratch) {
40
5.66k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
5.66k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
5.66k
  float* block = scratch;
43
5.66k
  if (ROWS < COLS) {
44
28.3k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
203k
      for (size_t x = 0; x < LF_COLS; x++) {
46
181k
        block[y * COLS + x] = input[y * input_stride + x] *
47
181k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
181k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
181k
      }
50
22.6k
    }
51
5.66k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
5.66k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
5.66k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
5.66k
                                  scratch_space);
64
5.66k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
40.1k
                                   const size_t output_stride, float* scratch) {
40
40.1k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
40.1k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
40.1k
  float* block = scratch;
43
40.1k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
40.1k
  } else {
52
361k
    for (size_t y = 0; y < LF_COLS; y++) {
53
2.89M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
2.57M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
2.57M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
2.57M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
2.57M
      }
58
321k
    }
59
40.1k
  }
60
61
40.1k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
40.1k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
40.1k
                                  scratch_space);
64
40.1k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
203k
                                   const size_t output_stride, float* scratch) {
40
203k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
203k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
203k
  float* block = scratch;
43
203k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
203k
  } else {
52
407k
    for (size_t y = 0; y < LF_COLS; y++) {
53
611k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
407k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
407k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
407k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
407k
      }
58
203k
    }
59
203k
  }
60
61
203k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
203k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
203k
                                  scratch_space);
64
203k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
218k
                                   const size_t output_stride, float* scratch) {
40
218k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
218k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
218k
  float* block = scratch;
43
218k
  if (ROWS < COLS) {
44
437k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
656k
      for (size_t x = 0; x < LF_COLS; x++) {
46
437k
        block[y * COLS + x] = input[y * input_stride + x] *
47
437k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
437k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
437k
      }
50
218k
    }
51
218k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
218k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
218k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
218k
                                  scratch_space);
64
218k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
140k
                                   const size_t output_stride, float* scratch) {
40
140k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
140k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
140k
  float* block = scratch;
43
140k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
140k
  } else {
52
422k
    for (size_t y = 0; y < LF_COLS; y++) {
53
845k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
563k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
563k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
563k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
563k
      }
58
281k
    }
59
140k
  }
60
61
140k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
140k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
140k
                                  scratch_space);
64
140k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
45.9k
                                   const size_t output_stride, float* scratch) {
40
45.9k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
45.9k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
45.9k
  float* block = scratch;
43
45.9k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
45.9k
  } else {
52
137k
    for (size_t y = 0; y < LF_COLS; y++) {
53
459k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
367k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
367k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
367k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
367k
      }
58
91.8k
    }
59
45.9k
  }
60
61
45.9k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
45.9k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
45.9k
                                  scratch_space);
64
45.9k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
44.7k
                                   const size_t output_stride, float* scratch) {
40
44.7k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
44.7k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
44.7k
  float* block = scratch;
43
44.7k
  if (ROWS < COLS) {
44
134k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
447k
      for (size_t x = 0; x < LF_COLS; x++) {
46
358k
        block[y * COLS + x] = input[y * input_stride + x] *
47
358k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
358k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
358k
      }
50
89.5k
    }
51
44.7k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
44.7k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
44.7k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
44.7k
                                  scratch_space);
64
44.7k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
69.7k
                                   const size_t output_stride, float* scratch) {
40
69.7k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
69.7k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
69.7k
  float* block = scratch;
43
69.7k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
69.7k
  } else {
52
348k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.39M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
1.11M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
1.11M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
1.11M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
1.11M
      }
58
278k
    }
59
69.7k
  }
60
61
69.7k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
69.7k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
69.7k
                                  scratch_space);
64
69.7k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
13.9k
                                   const size_t output_stride, float* scratch) {
40
13.9k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
13.9k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
13.9k
  float* block = scratch;
43
13.9k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
13.9k
  } else {
52
69.5k
    for (size_t y = 0; y < LF_COLS; y++) {
53
500k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
445k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
445k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
445k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
445k
      }
58
55.6k
    }
59
13.9k
  }
60
61
13.9k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
13.9k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
13.9k
                                  scratch_space);
64
13.9k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
5.66k
                                   const size_t output_stride, float* scratch) {
40
5.66k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
5.66k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
5.66k
  float* block = scratch;
43
5.66k
  if (ROWS < COLS) {
44
28.3k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
203k
      for (size_t x = 0; x < LF_COLS; x++) {
46
181k
        block[y * COLS + x] = input[y * input_stride + x] *
47
181k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
181k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
181k
      }
50
22.6k
    }
51
5.66k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
5.66k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
5.66k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
5.66k
                                  scratch_space);
64
5.66k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
40.1k
                                   const size_t output_stride, float* scratch) {
40
40.1k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
40.1k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
40.1k
  float* block = scratch;
43
40.1k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
40.1k
  } else {
52
361k
    for (size_t y = 0; y < LF_COLS; y++) {
53
2.89M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
2.57M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
2.57M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
2.57M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
2.57M
      }
58
321k
    }
59
40.1k
  }
60
61
40.1k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
40.1k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
40.1k
                                  scratch_space);
64
40.1k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
65
66
template <size_t S>
67
43.9M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
43.9M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
43.9M
  static_assert(S % 2 == 0, "S should be even");
70
43.9M
  float temp[kDCTBlockSize];
71
43.9M
  constexpr size_t num_2x2 = S / 2;
72
146M
  for (size_t y = 0; y < num_2x2; y++) {
73
410M
    for (size_t x = 0; x < num_2x2; x++) {
74
307M
      float c00 = block[y * 2 * stride + x * 2];
75
307M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
307M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
307M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
307M
      float r00 = c00 + c01 + c10 + c11;
79
307M
      float r01 = c00 + c01 - c10 - c11;
80
307M
      float r10 = c00 - c01 + c10 - c11;
81
307M
      float r11 = c00 - c01 - c10 + c11;
82
307M
      r00 *= 0.25f;
83
307M
      r01 *= 0.25f;
84
307M
      r10 *= 0.25f;
85
307M
      r11 *= 0.25f;
86
307M
      temp[y * kBlockDim + x] = r00;
87
307M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
307M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
307M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
307M
    }
91
102M
  }
92
249M
  for (size_t y = 0; y < S; y++) {
93
1.43G
    for (size_t x = 0; x < S; x++) {
94
1.23G
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
1.23G
    }
96
205M
  }
97
43.9M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.91M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.91M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.91M
  static_assert(S % 2 == 0, "S should be even");
70
1.91M
  float temp[kDCTBlockSize];
71
1.91M
  constexpr size_t num_2x2 = S / 2;
72
9.59M
  for (size_t y = 0; y < num_2x2; y++) {
73
38.3M
    for (size_t x = 0; x < num_2x2; x++) {
74
30.6M
      float c00 = block[y * 2 * stride + x * 2];
75
30.6M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
30.6M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
30.6M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
30.6M
      float r00 = c00 + c01 + c10 + c11;
79
30.6M
      float r01 = c00 + c01 - c10 - c11;
80
30.6M
      float r10 = c00 - c01 + c10 - c11;
81
30.6M
      float r11 = c00 - c01 - c10 + c11;
82
30.6M
      r00 *= 0.25f;
83
30.6M
      r01 *= 0.25f;
84
30.6M
      r10 *= 0.25f;
85
30.6M
      r11 *= 0.25f;
86
30.6M
      temp[y * kBlockDim + x] = r00;
87
30.6M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
30.6M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
30.6M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
30.6M
    }
91
7.67M
  }
92
17.2M
  for (size_t y = 0; y < S; y++) {
93
138M
    for (size_t x = 0; x < S; x++) {
94
122M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
122M
    }
96
15.3M
  }
97
1.91M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.91M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.91M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.91M
  static_assert(S % 2 == 0, "S should be even");
70
1.91M
  float temp[kDCTBlockSize];
71
1.91M
  constexpr size_t num_2x2 = S / 2;
72
5.75M
  for (size_t y = 0; y < num_2x2; y++) {
73
11.5M
    for (size_t x = 0; x < num_2x2; x++) {
74
7.67M
      float c00 = block[y * 2 * stride + x * 2];
75
7.67M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
7.67M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
7.67M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
7.67M
      float r00 = c00 + c01 + c10 + c11;
79
7.67M
      float r01 = c00 + c01 - c10 - c11;
80
7.67M
      float r10 = c00 - c01 + c10 - c11;
81
7.67M
      float r11 = c00 - c01 - c10 + c11;
82
7.67M
      r00 *= 0.25f;
83
7.67M
      r01 *= 0.25f;
84
7.67M
      r10 *= 0.25f;
85
7.67M
      r11 *= 0.25f;
86
7.67M
      temp[y * kBlockDim + x] = r00;
87
7.67M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
7.67M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
7.67M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
7.67M
    }
91
3.83M
  }
92
9.59M
  for (size_t y = 0; y < S; y++) {
93
38.3M
    for (size_t x = 0; x < S; x++) {
94
30.6M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
30.6M
    }
96
7.67M
  }
97
1.91M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.91M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.91M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.91M
  static_assert(S % 2 == 0, "S should be even");
70
1.91M
  float temp[kDCTBlockSize];
71
1.91M
  constexpr size_t num_2x2 = S / 2;
72
3.83M
  for (size_t y = 0; y < num_2x2; y++) {
73
3.83M
    for (size_t x = 0; x < num_2x2; x++) {
74
1.91M
      float c00 = block[y * 2 * stride + x * 2];
75
1.91M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
1.91M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
1.91M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
1.91M
      float r00 = c00 + c01 + c10 + c11;
79
1.91M
      float r01 = c00 + c01 - c10 - c11;
80
1.91M
      float r10 = c00 - c01 + c10 - c11;
81
1.91M
      float r11 = c00 - c01 - c10 + c11;
82
1.91M
      r00 *= 0.25f;
83
1.91M
      r01 *= 0.25f;
84
1.91M
      r10 *= 0.25f;
85
1.91M
      r11 *= 0.25f;
86
1.91M
      temp[y * kBlockDim + x] = r00;
87
1.91M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
1.91M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
1.91M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
1.91M
    }
91
1.91M
  }
92
5.75M
  for (size_t y = 0; y < S; y++) {
93
11.5M
    for (size_t x = 0; x < S; x++) {
94
7.67M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
7.67M
    }
96
3.83M
  }
97
1.91M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.91M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.91M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.91M
  static_assert(S % 2 == 0, "S should be even");
70
1.91M
  float temp[kDCTBlockSize];
71
1.91M
  constexpr size_t num_2x2 = S / 2;
72
9.59M
  for (size_t y = 0; y < num_2x2; y++) {
73
38.3M
    for (size_t x = 0; x < num_2x2; x++) {
74
30.6M
      float c00 = block[y * 2 * stride + x * 2];
75
30.6M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
30.6M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
30.6M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
30.6M
      float r00 = c00 + c01 + c10 + c11;
79
30.6M
      float r01 = c00 + c01 - c10 - c11;
80
30.6M
      float r10 = c00 - c01 + c10 - c11;
81
30.6M
      float r11 = c00 - c01 - c10 + c11;
82
30.6M
      r00 *= 0.25f;
83
30.6M
      r01 *= 0.25f;
84
30.6M
      r10 *= 0.25f;
85
30.6M
      r11 *= 0.25f;
86
30.6M
      temp[y * kBlockDim + x] = r00;
87
30.6M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
30.6M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
30.6M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
30.6M
    }
91
7.67M
  }
92
17.2M
  for (size_t y = 0; y < S; y++) {
93
138M
    for (size_t x = 0; x < S; x++) {
94
122M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
122M
    }
96
15.3M
  }
97
1.91M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.91M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.91M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.91M
  static_assert(S % 2 == 0, "S should be even");
70
1.91M
  float temp[kDCTBlockSize];
71
1.91M
  constexpr size_t num_2x2 = S / 2;
72
5.75M
  for (size_t y = 0; y < num_2x2; y++) {
73
11.5M
    for (size_t x = 0; x < num_2x2; x++) {
74
7.67M
      float c00 = block[y * 2 * stride + x * 2];
75
7.67M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
7.67M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
7.67M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
7.67M
      float r00 = c00 + c01 + c10 + c11;
79
7.67M
      float r01 = c00 + c01 - c10 - c11;
80
7.67M
      float r10 = c00 - c01 + c10 - c11;
81
7.67M
      float r11 = c00 - c01 - c10 + c11;
82
7.67M
      r00 *= 0.25f;
83
7.67M
      r01 *= 0.25f;
84
7.67M
      r10 *= 0.25f;
85
7.67M
      r11 *= 0.25f;
86
7.67M
      temp[y * kBlockDim + x] = r00;
87
7.67M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
7.67M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
7.67M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
7.67M
    }
91
3.83M
  }
92
9.59M
  for (size_t y = 0; y < S; y++) {
93
38.3M
    for (size_t x = 0; x < S; x++) {
94
30.6M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
30.6M
    }
96
7.67M
  }
97
1.91M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.91M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.91M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.91M
  static_assert(S % 2 == 0, "S should be even");
70
1.91M
  float temp[kDCTBlockSize];
71
1.91M
  constexpr size_t num_2x2 = S / 2;
72
3.83M
  for (size_t y = 0; y < num_2x2; y++) {
73
3.83M
    for (size_t x = 0; x < num_2x2; x++) {
74
1.91M
      float c00 = block[y * 2 * stride + x * 2];
75
1.91M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
1.91M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
1.91M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
1.91M
      float r00 = c00 + c01 + c10 + c11;
79
1.91M
      float r01 = c00 + c01 - c10 - c11;
80
1.91M
      float r10 = c00 - c01 + c10 - c11;
81
1.91M
      float r11 = c00 - c01 - c10 + c11;
82
1.91M
      r00 *= 0.25f;
83
1.91M
      r01 *= 0.25f;
84
1.91M
      r10 *= 0.25f;
85
1.91M
      r11 *= 0.25f;
86
1.91M
      temp[y * kBlockDim + x] = r00;
87
1.91M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
1.91M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
1.91M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
1.91M
    }
91
1.91M
  }
92
5.75M
  for (size_t y = 0; y < S; y++) {
93
11.5M
    for (size_t x = 0; x < S; x++) {
94
7.67M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
7.67M
    }
96
3.83M
  }
97
1.91M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
10.8M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
10.8M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
10.8M
  static_assert(S % 2 == 0, "S should be even");
70
10.8M
  float temp[kDCTBlockSize];
71
10.8M
  constexpr size_t num_2x2 = S / 2;
72
54.1M
  for (size_t y = 0; y < num_2x2; y++) {
73
216M
    for (size_t x = 0; x < num_2x2; x++) {
74
173M
      float c00 = block[y * 2 * stride + x * 2];
75
173M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
173M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
173M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
173M
      float r00 = c00 + c01 + c10 + c11;
79
173M
      float r01 = c00 + c01 - c10 - c11;
80
173M
      float r10 = c00 - c01 + c10 - c11;
81
173M
      float r11 = c00 - c01 - c10 + c11;
82
173M
      r00 *= 0.25f;
83
173M
      r01 *= 0.25f;
84
173M
      r10 *= 0.25f;
85
173M
      r11 *= 0.25f;
86
173M
      temp[y * kBlockDim + x] = r00;
87
173M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
173M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
173M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
173M
    }
91
43.3M
  }
92
97.4M
  for (size_t y = 0; y < S; y++) {
93
779M
    for (size_t x = 0; x < S; x++) {
94
692M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
692M
    }
96
86.6M
  }
97
10.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
10.8M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
10.8M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
10.8M
  static_assert(S % 2 == 0, "S should be even");
70
10.8M
  float temp[kDCTBlockSize];
71
10.8M
  constexpr size_t num_2x2 = S / 2;
72
32.4M
  for (size_t y = 0; y < num_2x2; y++) {
73
64.9M
    for (size_t x = 0; x < num_2x2; x++) {
74
43.3M
      float c00 = block[y * 2 * stride + x * 2];
75
43.3M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
43.3M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
43.3M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
43.3M
      float r00 = c00 + c01 + c10 + c11;
79
43.3M
      float r01 = c00 + c01 - c10 - c11;
80
43.3M
      float r10 = c00 - c01 + c10 - c11;
81
43.3M
      float r11 = c00 - c01 - c10 + c11;
82
43.3M
      r00 *= 0.25f;
83
43.3M
      r01 *= 0.25f;
84
43.3M
      r10 *= 0.25f;
85
43.3M
      r11 *= 0.25f;
86
43.3M
      temp[y * kBlockDim + x] = r00;
87
43.3M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
43.3M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
43.3M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
43.3M
    }
91
21.6M
  }
92
54.1M
  for (size_t y = 0; y < S; y++) {
93
216M
    for (size_t x = 0; x < S; x++) {
94
173M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
173M
    }
96
43.3M
  }
97
10.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
10.8M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
10.8M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
10.8M
  static_assert(S % 2 == 0, "S should be even");
70
10.8M
  float temp[kDCTBlockSize];
71
10.8M
  constexpr size_t num_2x2 = S / 2;
72
21.6M
  for (size_t y = 0; y < num_2x2; y++) {
73
21.6M
    for (size_t x = 0; x < num_2x2; x++) {
74
10.8M
      float c00 = block[y * 2 * stride + x * 2];
75
10.8M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
10.8M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
10.8M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
10.8M
      float r00 = c00 + c01 + c10 + c11;
79
10.8M
      float r01 = c00 + c01 - c10 - c11;
80
10.8M
      float r10 = c00 - c01 + c10 - c11;
81
10.8M
      float r11 = c00 - c01 - c10 + c11;
82
10.8M
      r00 *= 0.25f;
83
10.8M
      r01 *= 0.25f;
84
10.8M
      r10 *= 0.25f;
85
10.8M
      r11 *= 0.25f;
86
10.8M
      temp[y * kBlockDim + x] = r00;
87
10.8M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
10.8M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
10.8M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
10.8M
    }
91
10.8M
  }
92
32.4M
  for (size_t y = 0; y < S; y++) {
93
64.9M
    for (size_t x = 0; x < S; x++) {
94
43.3M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
43.3M
    }
96
21.6M
  }
97
10.8M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
98
99
43.8M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
43.8M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
43.8M
      {
102
43.8M
          0.2500000000000000,
103
43.8M
          0.8769029297991420f,
104
43.8M
          0.0000000000000000,
105
43.8M
          0.0000000000000000,
106
43.8M
          0.0000000000000000,
107
43.8M
          -0.4105377591765233f,
108
43.8M
          0.0000000000000000,
109
43.8M
          0.0000000000000000,
110
43.8M
          0.0000000000000000,
111
43.8M
          0.0000000000000000,
112
43.8M
          0.0000000000000000,
113
43.8M
          0.0000000000000000,
114
43.8M
          0.0000000000000000,
115
43.8M
          0.0000000000000000,
116
43.8M
          0.0000000000000000,
117
43.8M
          0.0000000000000000,
118
43.8M
      },
119
43.8M
      {
120
43.8M
          0.2500000000000000,
121
43.8M
          0.2206518106944235f,
122
43.8M
          0.0000000000000000,
123
43.8M
          0.0000000000000000,
124
43.8M
          -0.7071067811865474f,
125
43.8M
          0.6235485373547691f,
126
43.8M
          0.0000000000000000,
127
43.8M
          0.0000000000000000,
128
43.8M
          0.0000000000000000,
129
43.8M
          0.0000000000000000,
130
43.8M
          0.0000000000000000,
131
43.8M
          0.0000000000000000,
132
43.8M
          0.0000000000000000,
133
43.8M
          0.0000000000000000,
134
43.8M
          0.0000000000000000,
135
43.8M
          0.0000000000000000,
136
43.8M
      },
137
43.8M
      {
138
43.8M
          0.2500000000000000,
139
43.8M
          -0.1014005039375376f,
140
43.8M
          0.4067007583026075f,
141
43.8M
          -0.2125574805828875f,
142
43.8M
          0.0000000000000000,
143
43.8M
          -0.0643507165794627f,
144
43.8M
          -0.4517556589999482f,
145
43.8M
          -0.3046847507248690f,
146
43.8M
          0.3017929516615495f,
147
43.8M
          0.4082482904638627f,
148
43.8M
          0.1747866975480809f,
149
43.8M
          -0.2110560104933578f,
150
43.8M
          -0.1426608480880726f,
151
43.8M
          -0.1381354035075859f,
152
43.8M
          -0.1743760259965107f,
153
43.8M
          0.1135498731499434f,
154
43.8M
      },
155
43.8M
      {
156
43.8M
          0.2500000000000000,
157
43.8M
          -0.1014005039375375f,
158
43.8M
          0.4444481661973445f,
159
43.8M
          0.3085497062849767f,
160
43.8M
          0.0000000000000000f,
161
43.8M
          -0.0643507165794627f,
162
43.8M
          0.1585450355184006f,
163
43.8M
          0.5112616136591823f,
164
43.8M
          0.2579236279634118f,
165
43.8M
          0.0000000000000000,
166
43.8M
          0.0812611176717539f,
167
43.8M
          0.1856718091610980f,
168
43.8M
          -0.3416446842253372f,
169
43.8M
          0.3302282550303788f,
170
43.8M
          0.0702790691196284f,
171
43.8M
          -0.0741750459581035f,
172
43.8M
      },
173
43.8M
      {
174
43.8M
          0.2500000000000000,
175
43.8M
          0.2206518106944236f,
176
43.8M
          0.0000000000000000,
177
43.8M
          0.0000000000000000,
178
43.8M
          0.7071067811865476f,
179
43.8M
          0.6235485373547694f,
180
43.8M
          0.0000000000000000,
181
43.8M
          0.0000000000000000,
182
43.8M
          0.0000000000000000,
183
43.8M
          0.0000000000000000,
184
43.8M
          0.0000000000000000,
185
43.8M
          0.0000000000000000,
186
43.8M
          0.0000000000000000,
187
43.8M
          0.0000000000000000,
188
43.8M
          0.0000000000000000,
189
43.8M
          0.0000000000000000,
190
43.8M
      },
191
43.8M
      {
192
43.8M
          0.2500000000000000,
193
43.8M
          -0.1014005039375378f,
194
43.8M
          0.0000000000000000,
195
43.8M
          0.4706702258572536f,
196
43.8M
          0.0000000000000000,
197
43.8M
          -0.0643507165794628f,
198
43.8M
          -0.0403851516082220f,
199
43.8M
          0.0000000000000000,
200
43.8M
          0.1627234014286620f,
201
43.8M
          0.0000000000000000,
202
43.8M
          0.0000000000000000,
203
43.8M
          0.0000000000000000,
204
43.8M
          0.7367497537172237f,
205
43.8M
          0.0875511500058708f,
206
43.8M
          -0.2921026642334881f,
207
43.8M
          0.1940289303259434f,
208
43.8M
      },
209
43.8M
      {
210
43.8M
          0.2500000000000000,
211
43.8M
          -0.1014005039375377f,
212
43.8M
          0.1957439937204294f,
213
43.8M
          -0.1621205195722993f,
214
43.8M
          0.0000000000000000,
215
43.8M
          -0.0643507165794628f,
216
43.8M
          0.0074182263792424f,
217
43.8M
          -0.2904801297289980f,
218
43.8M
          0.0952002265347504f,
219
43.8M
          0.0000000000000000,
220
43.8M
          -0.3675398009862027f,
221
43.8M
          0.4921585901373873f,
222
43.8M
          0.2462710772207515f,
223
43.8M
          -0.0794670660590957f,
224
43.8M
          0.3623817333531167f,
225
43.8M
          -0.4351904965232280f,
226
43.8M
      },
227
43.8M
      {
228
43.8M
          0.2500000000000000,
229
43.8M
          -0.1014005039375376f,
230
43.8M
          0.2929100136981264f,
231
43.8M
          0.0000000000000000,
232
43.8M
          0.0000000000000000,
233
43.8M
          -0.0643507165794627f,
234
43.8M
          0.3935103426921017f,
235
43.8M
          -0.0657870154914280f,
236
43.8M
          0.0000000000000000,
237
43.8M
          -0.4082482904638628f,
238
43.8M
          -0.3078822139579090f,
239
43.8M
          -0.3852501370925192f,
240
43.8M
          -0.0857401903551931f,
241
43.8M
          -0.4613374887461511f,
242
43.8M
          0.0000000000000000,
243
43.8M
          0.2191868483885747f,
244
43.8M
      },
245
43.8M
      {
246
43.8M
          0.2500000000000000,
247
43.8M
          -0.1014005039375376f,
248
43.8M
          -0.4067007583026072f,
249
43.8M
          -0.2125574805828705f,
250
43.8M
          0.0000000000000000,
251
43.8M
          -0.0643507165794627f,
252
43.8M
          -0.4517556589999464f,
253
43.8M
          0.3046847507248840f,
254
43.8M
          0.3017929516615503f,
255
43.8M
          -0.4082482904638635f,
256
43.8M
          -0.1747866975480813f,
257
43.8M
          0.2110560104933581f,
258
43.8M
          -0.1426608480880734f,
259
43.8M
          -0.1381354035075829f,
260
43.8M
          -0.1743760259965108f,
261
43.8M
          0.1135498731499426f,
262
43.8M
      },
263
43.8M
      {
264
43.8M
          0.2500000000000000,
265
43.8M
          -0.1014005039375377f,
266
43.8M
          -0.1957439937204287f,
267
43.8M
          -0.1621205195722833f,
268
43.8M
          0.0000000000000000,
269
43.8M
          -0.0643507165794628f,
270
43.8M
          0.0074182263792444f,
271
43.8M
          0.2904801297290076f,
272
43.8M
          0.0952002265347505f,
273
43.8M
          0.0000000000000000,
274
43.8M
          0.3675398009862011f,
275
43.8M
          -0.4921585901373891f,
276
43.8M
          0.2462710772207514f,
277
43.8M
          -0.0794670660591026f,
278
43.8M
          0.3623817333531165f,
279
43.8M
          -0.4351904965232251f,
280
43.8M
      },
281
43.8M
      {
282
43.8M
          0.2500000000000000,
283
43.8M
          -0.1014005039375375f,
284
43.8M
          0.0000000000000000,
285
43.8M
          -0.4706702258572528f,
286
43.8M
          0.0000000000000000,
287
43.8M
          -0.0643507165794627f,
288
43.8M
          0.1107416575309343f,
289
43.8M
          0.0000000000000000,
290
43.8M
          -0.1627234014286617f,
291
43.8M
          0.0000000000000000,
292
43.8M
          0.0000000000000000,
293
43.8M
          0.0000000000000000,
294
43.8M
          0.1488339922711357f,
295
43.8M
          0.4972464710953509f,
296
43.8M
          0.2921026642334879f,
297
43.8M
          0.5550443808910661f,
298
43.8M
      },
299
43.8M
      {
300
43.8M
          0.2500000000000000,
301
43.8M
          -0.1014005039375377f,
302
43.8M
          0.1137907446044809f,
303
43.8M
          -0.1464291867126764f,
304
43.8M
          0.0000000000000000,
305
43.8M
          -0.0643507165794628f,
306
43.8M
          0.0829816309488205f,
307
43.8M
          -0.2388977352334460f,
308
43.8M
          -0.3531238544981630f,
309
43.8M
          -0.4082482904638630f,
310
43.8M
          0.4826689115059883f,
311
43.8M
          0.1741941265991622f,
312
43.8M
          -0.0476868035022925f,
313
43.8M
          0.1253805944856366f,
314
43.8M
          -0.4326608024727445f,
315
43.8M
          -0.2546827712406646f,
316
43.8M
      },
317
43.8M
      {
318
43.8M
          0.2500000000000000,
319
43.8M
          -0.1014005039375377f,
320
43.8M
          -0.4444481661973438f,
321
43.8M
          0.3085497062849487f,
322
43.8M
          0.0000000000000000,
323
43.8M
          -0.0643507165794628f,
324
43.8M
          0.1585450355183970f,
325
43.8M
          -0.5112616136592012f,
326
43.8M
          0.2579236279634129f,
327
43.8M
          0.0000000000000000,
328
43.8M
          -0.0812611176717504f,
329
43.8M
          -0.1856718091610990f,
330
43.8M
          -0.3416446842253373f,
331
43.8M
          0.3302282550303805f,
332
43.8M
          0.0702790691196282f,
333
43.8M
          -0.0741750459581023f,
334
43.8M
      },
335
43.8M
      {
336
43.8M
          0.2500000000000000,
337
43.8M
          -0.1014005039375376f,
338
43.8M
          -0.2929100136981264f,
339
43.8M
          0.0000000000000000,
340
43.8M
          0.0000000000000000,
341
43.8M
          -0.0643507165794627f,
342
43.8M
          0.3935103426921022f,
343
43.8M
          0.0657870154914254f,
344
43.8M
          0.0000000000000000,
345
43.8M
          0.4082482904638634f,
346
43.8M
          0.3078822139579031f,
347
43.8M
          0.3852501370925211f,
348
43.8M
          -0.0857401903551927f,
349
43.8M
          -0.4613374887461554f,
350
43.8M
          0.0000000000000000,
351
43.8M
          0.2191868483885728f,
352
43.8M
      },
353
43.8M
      {
354
43.8M
          0.2500000000000000,
355
43.8M
          -0.1014005039375376f,
356
43.8M
          -0.1137907446044814f,
357
43.8M
          -0.1464291867126654f,
358
43.8M
          0.0000000000000000,
359
43.8M
          -0.0643507165794627f,
360
43.8M
          0.0829816309488214f,
361
43.8M
          0.2388977352334547f,
362
43.8M
          -0.3531238544981624f,
363
43.8M
          0.4082482904638630f,
364
43.8M
          -0.4826689115059858f,
365
43.8M
          -0.1741941265991621f,
366
43.8M
          -0.0476868035022928f,
367
43.8M
          0.1253805944856431f,
368
43.8M
          -0.4326608024727457f,
369
43.8M
          -0.2546827712406641f,
370
43.8M
      },
371
43.8M
      {
372
43.8M
          0.2500000000000000,
373
43.8M
          -0.1014005039375374f,
374
43.8M
          0.0000000000000000,
375
43.8M
          0.4251149611657548f,
376
43.8M
          0.0000000000000000,
377
43.8M
          -0.0643507165794626f,
378
43.8M
          -0.4517556589999480f,
379
43.8M
          0.0000000000000000,
380
43.8M
          -0.6035859033230976f,
381
43.8M
          0.0000000000000000,
382
43.8M
          0.0000000000000000,
383
43.8M
          0.0000000000000000,
384
43.8M
          -0.1426608480880724f,
385
43.8M
          -0.1381354035075845f,
386
43.8M
          0.3487520519930227f,
387
43.8M
          0.1135498731499429f,
388
43.8M
      },
389
43.8M
  };
390
391
43.8M
  const HWY_CAPPED(float, 16) d;
392
131M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
87.7M
    auto scalar = Zero(d);
394
1.49G
    for (size_t j = 0; j < 16; j++) {
395
1.40G
      auto px = Set(d, pixels[j]);
396
1.40G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
1.40G
      scalar = MulAdd(px, basis, scalar);
398
1.40G
    }
399
87.7M
    Store(scalar, d, coeffs + i);
400
87.7M
  }
401
43.8M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
273k
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
273k
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
273k
      {
102
273k
          0.2500000000000000,
103
273k
          0.8769029297991420f,
104
273k
          0.0000000000000000,
105
273k
          0.0000000000000000,
106
273k
          0.0000000000000000,
107
273k
          -0.4105377591765233f,
108
273k
          0.0000000000000000,
109
273k
          0.0000000000000000,
110
273k
          0.0000000000000000,
111
273k
          0.0000000000000000,
112
273k
          0.0000000000000000,
113
273k
          0.0000000000000000,
114
273k
          0.0000000000000000,
115
273k
          0.0000000000000000,
116
273k
          0.0000000000000000,
117
273k
          0.0000000000000000,
118
273k
      },
119
273k
      {
120
273k
          0.2500000000000000,
121
273k
          0.2206518106944235f,
122
273k
          0.0000000000000000,
123
273k
          0.0000000000000000,
124
273k
          -0.7071067811865474f,
125
273k
          0.6235485373547691f,
126
273k
          0.0000000000000000,
127
273k
          0.0000000000000000,
128
273k
          0.0000000000000000,
129
273k
          0.0000000000000000,
130
273k
          0.0000000000000000,
131
273k
          0.0000000000000000,
132
273k
          0.0000000000000000,
133
273k
          0.0000000000000000,
134
273k
          0.0000000000000000,
135
273k
          0.0000000000000000,
136
273k
      },
137
273k
      {
138
273k
          0.2500000000000000,
139
273k
          -0.1014005039375376f,
140
273k
          0.4067007583026075f,
141
273k
          -0.2125574805828875f,
142
273k
          0.0000000000000000,
143
273k
          -0.0643507165794627f,
144
273k
          -0.4517556589999482f,
145
273k
          -0.3046847507248690f,
146
273k
          0.3017929516615495f,
147
273k
          0.4082482904638627f,
148
273k
          0.1747866975480809f,
149
273k
          -0.2110560104933578f,
150
273k
          -0.1426608480880726f,
151
273k
          -0.1381354035075859f,
152
273k
          -0.1743760259965107f,
153
273k
          0.1135498731499434f,
154
273k
      },
155
273k
      {
156
273k
          0.2500000000000000,
157
273k
          -0.1014005039375375f,
158
273k
          0.4444481661973445f,
159
273k
          0.3085497062849767f,
160
273k
          0.0000000000000000f,
161
273k
          -0.0643507165794627f,
162
273k
          0.1585450355184006f,
163
273k
          0.5112616136591823f,
164
273k
          0.2579236279634118f,
165
273k
          0.0000000000000000,
166
273k
          0.0812611176717539f,
167
273k
          0.1856718091610980f,
168
273k
          -0.3416446842253372f,
169
273k
          0.3302282550303788f,
170
273k
          0.0702790691196284f,
171
273k
          -0.0741750459581035f,
172
273k
      },
173
273k
      {
174
273k
          0.2500000000000000,
175
273k
          0.2206518106944236f,
176
273k
          0.0000000000000000,
177
273k
          0.0000000000000000,
178
273k
          0.7071067811865476f,
179
273k
          0.6235485373547694f,
180
273k
          0.0000000000000000,
181
273k
          0.0000000000000000,
182
273k
          0.0000000000000000,
183
273k
          0.0000000000000000,
184
273k
          0.0000000000000000,
185
273k
          0.0000000000000000,
186
273k
          0.0000000000000000,
187
273k
          0.0000000000000000,
188
273k
          0.0000000000000000,
189
273k
          0.0000000000000000,
190
273k
      },
191
273k
      {
192
273k
          0.2500000000000000,
193
273k
          -0.1014005039375378f,
194
273k
          0.0000000000000000,
195
273k
          0.4706702258572536f,
196
273k
          0.0000000000000000,
197
273k
          -0.0643507165794628f,
198
273k
          -0.0403851516082220f,
199
273k
          0.0000000000000000,
200
273k
          0.1627234014286620f,
201
273k
          0.0000000000000000,
202
273k
          0.0000000000000000,
203
273k
          0.0000000000000000,
204
273k
          0.7367497537172237f,
205
273k
          0.0875511500058708f,
206
273k
          -0.2921026642334881f,
207
273k
          0.1940289303259434f,
208
273k
      },
209
273k
      {
210
273k
          0.2500000000000000,
211
273k
          -0.1014005039375377f,
212
273k
          0.1957439937204294f,
213
273k
          -0.1621205195722993f,
214
273k
          0.0000000000000000,
215
273k
          -0.0643507165794628f,
216
273k
          0.0074182263792424f,
217
273k
          -0.2904801297289980f,
218
273k
          0.0952002265347504f,
219
273k
          0.0000000000000000,
220
273k
          -0.3675398009862027f,
221
273k
          0.4921585901373873f,
222
273k
          0.2462710772207515f,
223
273k
          -0.0794670660590957f,
224
273k
          0.3623817333531167f,
225
273k
          -0.4351904965232280f,
226
273k
      },
227
273k
      {
228
273k
          0.2500000000000000,
229
273k
          -0.1014005039375376f,
230
273k
          0.2929100136981264f,
231
273k
          0.0000000000000000,
232
273k
          0.0000000000000000,
233
273k
          -0.0643507165794627f,
234
273k
          0.3935103426921017f,
235
273k
          -0.0657870154914280f,
236
273k
          0.0000000000000000,
237
273k
          -0.4082482904638628f,
238
273k
          -0.3078822139579090f,
239
273k
          -0.3852501370925192f,
240
273k
          -0.0857401903551931f,
241
273k
          -0.4613374887461511f,
242
273k
          0.0000000000000000,
243
273k
          0.2191868483885747f,
244
273k
      },
245
273k
      {
246
273k
          0.2500000000000000,
247
273k
          -0.1014005039375376f,
248
273k
          -0.4067007583026072f,
249
273k
          -0.2125574805828705f,
250
273k
          0.0000000000000000,
251
273k
          -0.0643507165794627f,
252
273k
          -0.4517556589999464f,
253
273k
          0.3046847507248840f,
254
273k
          0.3017929516615503f,
255
273k
          -0.4082482904638635f,
256
273k
          -0.1747866975480813f,
257
273k
          0.2110560104933581f,
258
273k
          -0.1426608480880734f,
259
273k
          -0.1381354035075829f,
260
273k
          -0.1743760259965108f,
261
273k
          0.1135498731499426f,
262
273k
      },
263
273k
      {
264
273k
          0.2500000000000000,
265
273k
          -0.1014005039375377f,
266
273k
          -0.1957439937204287f,
267
273k
          -0.1621205195722833f,
268
273k
          0.0000000000000000,
269
273k
          -0.0643507165794628f,
270
273k
          0.0074182263792444f,
271
273k
          0.2904801297290076f,
272
273k
          0.0952002265347505f,
273
273k
          0.0000000000000000,
274
273k
          0.3675398009862011f,
275
273k
          -0.4921585901373891f,
276
273k
          0.2462710772207514f,
277
273k
          -0.0794670660591026f,
278
273k
          0.3623817333531165f,
279
273k
          -0.4351904965232251f,
280
273k
      },
281
273k
      {
282
273k
          0.2500000000000000,
283
273k
          -0.1014005039375375f,
284
273k
          0.0000000000000000,
285
273k
          -0.4706702258572528f,
286
273k
          0.0000000000000000,
287
273k
          -0.0643507165794627f,
288
273k
          0.1107416575309343f,
289
273k
          0.0000000000000000,
290
273k
          -0.1627234014286617f,
291
273k
          0.0000000000000000,
292
273k
          0.0000000000000000,
293
273k
          0.0000000000000000,
294
273k
          0.1488339922711357f,
295
273k
          0.4972464710953509f,
296
273k
          0.2921026642334879f,
297
273k
          0.5550443808910661f,
298
273k
      },
299
273k
      {
300
273k
          0.2500000000000000,
301
273k
          -0.1014005039375377f,
302
273k
          0.1137907446044809f,
303
273k
          -0.1464291867126764f,
304
273k
          0.0000000000000000,
305
273k
          -0.0643507165794628f,
306
273k
          0.0829816309488205f,
307
273k
          -0.2388977352334460f,
308
273k
          -0.3531238544981630f,
309
273k
          -0.4082482904638630f,
310
273k
          0.4826689115059883f,
311
273k
          0.1741941265991622f,
312
273k
          -0.0476868035022925f,
313
273k
          0.1253805944856366f,
314
273k
          -0.4326608024727445f,
315
273k
          -0.2546827712406646f,
316
273k
      },
317
273k
      {
318
273k
          0.2500000000000000,
319
273k
          -0.1014005039375377f,
320
273k
          -0.4444481661973438f,
321
273k
          0.3085497062849487f,
322
273k
          0.0000000000000000,
323
273k
          -0.0643507165794628f,
324
273k
          0.1585450355183970f,
325
273k
          -0.5112616136592012f,
326
273k
          0.2579236279634129f,
327
273k
          0.0000000000000000,
328
273k
          -0.0812611176717504f,
329
273k
          -0.1856718091610990f,
330
273k
          -0.3416446842253373f,
331
273k
          0.3302282550303805f,
332
273k
          0.0702790691196282f,
333
273k
          -0.0741750459581023f,
334
273k
      },
335
273k
      {
336
273k
          0.2500000000000000,
337
273k
          -0.1014005039375376f,
338
273k
          -0.2929100136981264f,
339
273k
          0.0000000000000000,
340
273k
          0.0000000000000000,
341
273k
          -0.0643507165794627f,
342
273k
          0.3935103426921022f,
343
273k
          0.0657870154914254f,
344
273k
          0.0000000000000000,
345
273k
          0.4082482904638634f,
346
273k
          0.3078822139579031f,
347
273k
          0.3852501370925211f,
348
273k
          -0.0857401903551927f,
349
273k
          -0.4613374887461554f,
350
273k
          0.0000000000000000,
351
273k
          0.2191868483885728f,
352
273k
      },
353
273k
      {
354
273k
          0.2500000000000000,
355
273k
          -0.1014005039375376f,
356
273k
          -0.1137907446044814f,
357
273k
          -0.1464291867126654f,
358
273k
          0.0000000000000000,
359
273k
          -0.0643507165794627f,
360
273k
          0.0829816309488214f,
361
273k
          0.2388977352334547f,
362
273k
          -0.3531238544981624f,
363
273k
          0.4082482904638630f,
364
273k
          -0.4826689115059858f,
365
273k
          -0.1741941265991621f,
366
273k
          -0.0476868035022928f,
367
273k
          0.1253805944856431f,
368
273k
          -0.4326608024727457f,
369
273k
          -0.2546827712406641f,
370
273k
      },
371
273k
      {
372
273k
          0.2500000000000000,
373
273k
          -0.1014005039375374f,
374
273k
          0.0000000000000000,
375
273k
          0.4251149611657548f,
376
273k
          0.0000000000000000,
377
273k
          -0.0643507165794626f,
378
273k
          -0.4517556589999480f,
379
273k
          0.0000000000000000,
380
273k
          -0.6035859033230976f,
381
273k
          0.0000000000000000,
382
273k
          0.0000000000000000,
383
273k
          0.0000000000000000,
384
273k
          -0.1426608480880724f,
385
273k
          -0.1381354035075845f,
386
273k
          0.3487520519930227f,
387
273k
          0.1135498731499429f,
388
273k
      },
389
273k
  };
390
391
273k
  const HWY_CAPPED(float, 16) d;
392
820k
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
546k
    auto scalar = Zero(d);
394
9.29M
    for (size_t j = 0; j < 16; j++) {
395
8.75M
      auto px = Set(d, pixels[j]);
396
8.75M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
8.75M
      scalar = MulAdd(px, basis, scalar);
398
8.75M
    }
399
546k
    Store(scalar, d, coeffs + i);
400
546k
  }
401
273k
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
273k
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
273k
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
273k
      {
102
273k
          0.2500000000000000,
103
273k
          0.8769029297991420f,
104
273k
          0.0000000000000000,
105
273k
          0.0000000000000000,
106
273k
          0.0000000000000000,
107
273k
          -0.4105377591765233f,
108
273k
          0.0000000000000000,
109
273k
          0.0000000000000000,
110
273k
          0.0000000000000000,
111
273k
          0.0000000000000000,
112
273k
          0.0000000000000000,
113
273k
          0.0000000000000000,
114
273k
          0.0000000000000000,
115
273k
          0.0000000000000000,
116
273k
          0.0000000000000000,
117
273k
          0.0000000000000000,
118
273k
      },
119
273k
      {
120
273k
          0.2500000000000000,
121
273k
          0.2206518106944235f,
122
273k
          0.0000000000000000,
123
273k
          0.0000000000000000,
124
273k
          -0.7071067811865474f,
125
273k
          0.6235485373547691f,
126
273k
          0.0000000000000000,
127
273k
          0.0000000000000000,
128
273k
          0.0000000000000000,
129
273k
          0.0000000000000000,
130
273k
          0.0000000000000000,
131
273k
          0.0000000000000000,
132
273k
          0.0000000000000000,
133
273k
          0.0000000000000000,
134
273k
          0.0000000000000000,
135
273k
          0.0000000000000000,
136
273k
      },
137
273k
      {
138
273k
          0.2500000000000000,
139
273k
          -0.1014005039375376f,
140
273k
          0.4067007583026075f,
141
273k
          -0.2125574805828875f,
142
273k
          0.0000000000000000,
143
273k
          -0.0643507165794627f,
144
273k
          -0.4517556589999482f,
145
273k
          -0.3046847507248690f,
146
273k
          0.3017929516615495f,
147
273k
          0.4082482904638627f,
148
273k
          0.1747866975480809f,
149
273k
          -0.2110560104933578f,
150
273k
          -0.1426608480880726f,
151
273k
          -0.1381354035075859f,
152
273k
          -0.1743760259965107f,
153
273k
          0.1135498731499434f,
154
273k
      },
155
273k
      {
156
273k
          0.2500000000000000,
157
273k
          -0.1014005039375375f,
158
273k
          0.4444481661973445f,
159
273k
          0.3085497062849767f,
160
273k
          0.0000000000000000f,
161
273k
          -0.0643507165794627f,
162
273k
          0.1585450355184006f,
163
273k
          0.5112616136591823f,
164
273k
          0.2579236279634118f,
165
273k
          0.0000000000000000,
166
273k
          0.0812611176717539f,
167
273k
          0.1856718091610980f,
168
273k
          -0.3416446842253372f,
169
273k
          0.3302282550303788f,
170
273k
          0.0702790691196284f,
171
273k
          -0.0741750459581035f,
172
273k
      },
173
273k
      {
174
273k
          0.2500000000000000,
175
273k
          0.2206518106944236f,
176
273k
          0.0000000000000000,
177
273k
          0.0000000000000000,
178
273k
          0.7071067811865476f,
179
273k
          0.6235485373547694f,
180
273k
          0.0000000000000000,
181
273k
          0.0000000000000000,
182
273k
          0.0000000000000000,
183
273k
          0.0000000000000000,
184
273k
          0.0000000000000000,
185
273k
          0.0000000000000000,
186
273k
          0.0000000000000000,
187
273k
          0.0000000000000000,
188
273k
          0.0000000000000000,
189
273k
          0.0000000000000000,
190
273k
      },
191
273k
      {
192
273k
          0.2500000000000000,
193
273k
          -0.1014005039375378f,
194
273k
          0.0000000000000000,
195
273k
          0.4706702258572536f,
196
273k
          0.0000000000000000,
197
273k
          -0.0643507165794628f,
198
273k
          -0.0403851516082220f,
199
273k
          0.0000000000000000,
200
273k
          0.1627234014286620f,
201
273k
          0.0000000000000000,
202
273k
          0.0000000000000000,
203
273k
          0.0000000000000000,
204
273k
          0.7367497537172237f,
205
273k
          0.0875511500058708f,
206
273k
          -0.2921026642334881f,
207
273k
          0.1940289303259434f,
208
273k
      },
209
273k
      {
210
273k
          0.2500000000000000,
211
273k
          -0.1014005039375377f,
212
273k
          0.1957439937204294f,
213
273k
          -0.1621205195722993f,
214
273k
          0.0000000000000000,
215
273k
          -0.0643507165794628f,
216
273k
          0.0074182263792424f,
217
273k
          -0.2904801297289980f,
218
273k
          0.0952002265347504f,
219
273k
          0.0000000000000000,
220
273k
          -0.3675398009862027f,
221
273k
          0.4921585901373873f,
222
273k
          0.2462710772207515f,
223
273k
          -0.0794670660590957f,
224
273k
          0.3623817333531167f,
225
273k
          -0.4351904965232280f,
226
273k
      },
227
273k
      {
228
273k
          0.2500000000000000,
229
273k
          -0.1014005039375376f,
230
273k
          0.2929100136981264f,
231
273k
          0.0000000000000000,
232
273k
          0.0000000000000000,
233
273k
          -0.0643507165794627f,
234
273k
          0.3935103426921017f,
235
273k
          -0.0657870154914280f,
236
273k
          0.0000000000000000,
237
273k
          -0.4082482904638628f,
238
273k
          -0.3078822139579090f,
239
273k
          -0.3852501370925192f,
240
273k
          -0.0857401903551931f,
241
273k
          -0.4613374887461511f,
242
273k
          0.0000000000000000,
243
273k
          0.2191868483885747f,
244
273k
      },
245
273k
      {
246
273k
          0.2500000000000000,
247
273k
          -0.1014005039375376f,
248
273k
          -0.4067007583026072f,
249
273k
          -0.2125574805828705f,
250
273k
          0.0000000000000000,
251
273k
          -0.0643507165794627f,
252
273k
          -0.4517556589999464f,
253
273k
          0.3046847507248840f,
254
273k
          0.3017929516615503f,
255
273k
          -0.4082482904638635f,
256
273k
          -0.1747866975480813f,
257
273k
          0.2110560104933581f,
258
273k
          -0.1426608480880734f,
259
273k
          -0.1381354035075829f,
260
273k
          -0.1743760259965108f,
261
273k
          0.1135498731499426f,
262
273k
      },
263
273k
      {
264
273k
          0.2500000000000000,
265
273k
          -0.1014005039375377f,
266
273k
          -0.1957439937204287f,
267
273k
          -0.1621205195722833f,
268
273k
          0.0000000000000000,
269
273k
          -0.0643507165794628f,
270
273k
          0.0074182263792444f,
271
273k
          0.2904801297290076f,
272
273k
          0.0952002265347505f,
273
273k
          0.0000000000000000,
274
273k
          0.3675398009862011f,
275
273k
          -0.4921585901373891f,
276
273k
          0.2462710772207514f,
277
273k
          -0.0794670660591026f,
278
273k
          0.3623817333531165f,
279
273k
          -0.4351904965232251f,
280
273k
      },
281
273k
      {
282
273k
          0.2500000000000000,
283
273k
          -0.1014005039375375f,
284
273k
          0.0000000000000000,
285
273k
          -0.4706702258572528f,
286
273k
          0.0000000000000000,
287
273k
          -0.0643507165794627f,
288
273k
          0.1107416575309343f,
289
273k
          0.0000000000000000,
290
273k
          -0.1627234014286617f,
291
273k
          0.0000000000000000,
292
273k
          0.0000000000000000,
293
273k
          0.0000000000000000,
294
273k
          0.1488339922711357f,
295
273k
          0.4972464710953509f,
296
273k
          0.2921026642334879f,
297
273k
          0.5550443808910661f,
298
273k
      },
299
273k
      {
300
273k
          0.2500000000000000,
301
273k
          -0.1014005039375377f,
302
273k
          0.1137907446044809f,
303
273k
          -0.1464291867126764f,
304
273k
          0.0000000000000000,
305
273k
          -0.0643507165794628f,
306
273k
          0.0829816309488205f,
307
273k
          -0.2388977352334460f,
308
273k
          -0.3531238544981630f,
309
273k
          -0.4082482904638630f,
310
273k
          0.4826689115059883f,
311
273k
          0.1741941265991622f,
312
273k
          -0.0476868035022925f,
313
273k
          0.1253805944856366f,
314
273k
          -0.4326608024727445f,
315
273k
          -0.2546827712406646f,
316
273k
      },
317
273k
      {
318
273k
          0.2500000000000000,
319
273k
          -0.1014005039375377f,
320
273k
          -0.4444481661973438f,
321
273k
          0.3085497062849487f,
322
273k
          0.0000000000000000,
323
273k
          -0.0643507165794628f,
324
273k
          0.1585450355183970f,
325
273k
          -0.5112616136592012f,
326
273k
          0.2579236279634129f,
327
273k
          0.0000000000000000,
328
273k
          -0.0812611176717504f,
329
273k
          -0.1856718091610990f,
330
273k
          -0.3416446842253373f,
331
273k
          0.3302282550303805f,
332
273k
          0.0702790691196282f,
333
273k
          -0.0741750459581023f,
334
273k
      },
335
273k
      {
336
273k
          0.2500000000000000,
337
273k
          -0.1014005039375376f,
338
273k
          -0.2929100136981264f,
339
273k
          0.0000000000000000,
340
273k
          0.0000000000000000,
341
273k
          -0.0643507165794627f,
342
273k
          0.3935103426921022f,
343
273k
          0.0657870154914254f,
344
273k
          0.0000000000000000,
345
273k
          0.4082482904638634f,
346
273k
          0.3078822139579031f,
347
273k
          0.3852501370925211f,
348
273k
          -0.0857401903551927f,
349
273k
          -0.4613374887461554f,
350
273k
          0.0000000000000000,
351
273k
          0.2191868483885728f,
352
273k
      },
353
273k
      {
354
273k
          0.2500000000000000,
355
273k
          -0.1014005039375376f,
356
273k
          -0.1137907446044814f,
357
273k
          -0.1464291867126654f,
358
273k
          0.0000000000000000,
359
273k
          -0.0643507165794627f,
360
273k
          0.0829816309488214f,
361
273k
          0.2388977352334547f,
362
273k
          -0.3531238544981624f,
363
273k
          0.4082482904638630f,
364
273k
          -0.4826689115059858f,
365
273k
          -0.1741941265991621f,
366
273k
          -0.0476868035022928f,
367
273k
          0.1253805944856431f,
368
273k
          -0.4326608024727457f,
369
273k
          -0.2546827712406641f,
370
273k
      },
371
273k
      {
372
273k
          0.2500000000000000,
373
273k
          -0.1014005039375374f,
374
273k
          0.0000000000000000,
375
273k
          0.4251149611657548f,
376
273k
          0.0000000000000000,
377
273k
          -0.0643507165794626f,
378
273k
          -0.4517556589999480f,
379
273k
          0.0000000000000000,
380
273k
          -0.6035859033230976f,
381
273k
          0.0000000000000000,
382
273k
          0.0000000000000000,
383
273k
          0.0000000000000000,
384
273k
          -0.1426608480880724f,
385
273k
          -0.1381354035075845f,
386
273k
          0.3487520519930227f,
387
273k
          0.1135498731499429f,
388
273k
      },
389
273k
  };
390
391
273k
  const HWY_CAPPED(float, 16) d;
392
820k
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
546k
    auto scalar = Zero(d);
394
9.29M
    for (size_t j = 0; j < 16; j++) {
395
8.75M
      auto px = Set(d, pixels[j]);
396
8.75M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
8.75M
      scalar = MulAdd(px, basis, scalar);
398
8.75M
    }
399
546k
    Store(scalar, d, coeffs + i);
400
546k
  }
401
273k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
43.3M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
43.3M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
43.3M
      {
102
43.3M
          0.2500000000000000,
103
43.3M
          0.8769029297991420f,
104
43.3M
          0.0000000000000000,
105
43.3M
          0.0000000000000000,
106
43.3M
          0.0000000000000000,
107
43.3M
          -0.4105377591765233f,
108
43.3M
          0.0000000000000000,
109
43.3M
          0.0000000000000000,
110
43.3M
          0.0000000000000000,
111
43.3M
          0.0000000000000000,
112
43.3M
          0.0000000000000000,
113
43.3M
          0.0000000000000000,
114
43.3M
          0.0000000000000000,
115
43.3M
          0.0000000000000000,
116
43.3M
          0.0000000000000000,
117
43.3M
          0.0000000000000000,
118
43.3M
      },
119
43.3M
      {
120
43.3M
          0.2500000000000000,
121
43.3M
          0.2206518106944235f,
122
43.3M
          0.0000000000000000,
123
43.3M
          0.0000000000000000,
124
43.3M
          -0.7071067811865474f,
125
43.3M
          0.6235485373547691f,
126
43.3M
          0.0000000000000000,
127
43.3M
          0.0000000000000000,
128
43.3M
          0.0000000000000000,
129
43.3M
          0.0000000000000000,
130
43.3M
          0.0000000000000000,
131
43.3M
          0.0000000000000000,
132
43.3M
          0.0000000000000000,
133
43.3M
          0.0000000000000000,
134
43.3M
          0.0000000000000000,
135
43.3M
          0.0000000000000000,
136
43.3M
      },
137
43.3M
      {
138
43.3M
          0.2500000000000000,
139
43.3M
          -0.1014005039375376f,
140
43.3M
          0.4067007583026075f,
141
43.3M
          -0.2125574805828875f,
142
43.3M
          0.0000000000000000,
143
43.3M
          -0.0643507165794627f,
144
43.3M
          -0.4517556589999482f,
145
43.3M
          -0.3046847507248690f,
146
43.3M
          0.3017929516615495f,
147
43.3M
          0.4082482904638627f,
148
43.3M
          0.1747866975480809f,
149
43.3M
          -0.2110560104933578f,
150
43.3M
          -0.1426608480880726f,
151
43.3M
          -0.1381354035075859f,
152
43.3M
          -0.1743760259965107f,
153
43.3M
          0.1135498731499434f,
154
43.3M
      },
155
43.3M
      {
156
43.3M
          0.2500000000000000,
157
43.3M
          -0.1014005039375375f,
158
43.3M
          0.4444481661973445f,
159
43.3M
          0.3085497062849767f,
160
43.3M
          0.0000000000000000f,
161
43.3M
          -0.0643507165794627f,
162
43.3M
          0.1585450355184006f,
163
43.3M
          0.5112616136591823f,
164
43.3M
          0.2579236279634118f,
165
43.3M
          0.0000000000000000,
166
43.3M
          0.0812611176717539f,
167
43.3M
          0.1856718091610980f,
168
43.3M
          -0.3416446842253372f,
169
43.3M
          0.3302282550303788f,
170
43.3M
          0.0702790691196284f,
171
43.3M
          -0.0741750459581035f,
172
43.3M
      },
173
43.3M
      {
174
43.3M
          0.2500000000000000,
175
43.3M
          0.2206518106944236f,
176
43.3M
          0.0000000000000000,
177
43.3M
          0.0000000000000000,
178
43.3M
          0.7071067811865476f,
179
43.3M
          0.6235485373547694f,
180
43.3M
          0.0000000000000000,
181
43.3M
          0.0000000000000000,
182
43.3M
          0.0000000000000000,
183
43.3M
          0.0000000000000000,
184
43.3M
          0.0000000000000000,
185
43.3M
          0.0000000000000000,
186
43.3M
          0.0000000000000000,
187
43.3M
          0.0000000000000000,
188
43.3M
          0.0000000000000000,
189
43.3M
          0.0000000000000000,
190
43.3M
      },
191
43.3M
      {
192
43.3M
          0.2500000000000000,
193
43.3M
          -0.1014005039375378f,
194
43.3M
          0.0000000000000000,
195
43.3M
          0.4706702258572536f,
196
43.3M
          0.0000000000000000,
197
43.3M
          -0.0643507165794628f,
198
43.3M
          -0.0403851516082220f,
199
43.3M
          0.0000000000000000,
200
43.3M
          0.1627234014286620f,
201
43.3M
          0.0000000000000000,
202
43.3M
          0.0000000000000000,
203
43.3M
          0.0000000000000000,
204
43.3M
          0.7367497537172237f,
205
43.3M
          0.0875511500058708f,
206
43.3M
          -0.2921026642334881f,
207
43.3M
          0.1940289303259434f,
208
43.3M
      },
209
43.3M
      {
210
43.3M
          0.2500000000000000,
211
43.3M
          -0.1014005039375377f,
212
43.3M
          0.1957439937204294f,
213
43.3M
          -0.1621205195722993f,
214
43.3M
          0.0000000000000000,
215
43.3M
          -0.0643507165794628f,
216
43.3M
          0.0074182263792424f,
217
43.3M
          -0.2904801297289980f,
218
43.3M
          0.0952002265347504f,
219
43.3M
          0.0000000000000000,
220
43.3M
          -0.3675398009862027f,
221
43.3M
          0.4921585901373873f,
222
43.3M
          0.2462710772207515f,
223
43.3M
          -0.0794670660590957f,
224
43.3M
          0.3623817333531167f,
225
43.3M
          -0.4351904965232280f,
226
43.3M
      },
227
43.3M
      {
228
43.3M
          0.2500000000000000,
229
43.3M
          -0.1014005039375376f,
230
43.3M
          0.2929100136981264f,
231
43.3M
          0.0000000000000000,
232
43.3M
          0.0000000000000000,
233
43.3M
          -0.0643507165794627f,
234
43.3M
          0.3935103426921017f,
235
43.3M
          -0.0657870154914280f,
236
43.3M
          0.0000000000000000,
237
43.3M
          -0.4082482904638628f,
238
43.3M
          -0.3078822139579090f,
239
43.3M
          -0.3852501370925192f,
240
43.3M
          -0.0857401903551931f,
241
43.3M
          -0.4613374887461511f,
242
43.3M
          0.0000000000000000,
243
43.3M
          0.2191868483885747f,
244
43.3M
      },
245
43.3M
      {
246
43.3M
          0.2500000000000000,
247
43.3M
          -0.1014005039375376f,
248
43.3M
          -0.4067007583026072f,
249
43.3M
          -0.2125574805828705f,
250
43.3M
          0.0000000000000000,
251
43.3M
          -0.0643507165794627f,
252
43.3M
          -0.4517556589999464f,
253
43.3M
          0.3046847507248840f,
254
43.3M
          0.3017929516615503f,
255
43.3M
          -0.4082482904638635f,
256
43.3M
          -0.1747866975480813f,
257
43.3M
          0.2110560104933581f,
258
43.3M
          -0.1426608480880734f,
259
43.3M
          -0.1381354035075829f,
260
43.3M
          -0.1743760259965108f,
261
43.3M
          0.1135498731499426f,
262
43.3M
      },
263
43.3M
      {
264
43.3M
          0.2500000000000000,
265
43.3M
          -0.1014005039375377f,
266
43.3M
          -0.1957439937204287f,
267
43.3M
          -0.1621205195722833f,
268
43.3M
          0.0000000000000000,
269
43.3M
          -0.0643507165794628f,
270
43.3M
          0.0074182263792444f,
271
43.3M
          0.2904801297290076f,
272
43.3M
          0.0952002265347505f,
273
43.3M
          0.0000000000000000,
274
43.3M
          0.3675398009862011f,
275
43.3M
          -0.4921585901373891f,
276
43.3M
          0.2462710772207514f,
277
43.3M
          -0.0794670660591026f,
278
43.3M
          0.3623817333531165f,
279
43.3M
          -0.4351904965232251f,
280
43.3M
      },
281
43.3M
      {
282
43.3M
          0.2500000000000000,
283
43.3M
          -0.1014005039375375f,
284
43.3M
          0.0000000000000000,
285
43.3M
          -0.4706702258572528f,
286
43.3M
          0.0000000000000000,
287
43.3M
          -0.0643507165794627f,
288
43.3M
          0.1107416575309343f,
289
43.3M
          0.0000000000000000,
290
43.3M
          -0.1627234014286617f,
291
43.3M
          0.0000000000000000,
292
43.3M
          0.0000000000000000,
293
43.3M
          0.0000000000000000,
294
43.3M
          0.1488339922711357f,
295
43.3M
          0.4972464710953509f,
296
43.3M
          0.2921026642334879f,
297
43.3M
          0.5550443808910661f,
298
43.3M
      },
299
43.3M
      {
300
43.3M
          0.2500000000000000,
301
43.3M
          -0.1014005039375377f,
302
43.3M
          0.1137907446044809f,
303
43.3M
          -0.1464291867126764f,
304
43.3M
          0.0000000000000000,
305
43.3M
          -0.0643507165794628f,
306
43.3M
          0.0829816309488205f,
307
43.3M
          -0.2388977352334460f,
308
43.3M
          -0.3531238544981630f,
309
43.3M
          -0.4082482904638630f,
310
43.3M
          0.4826689115059883f,
311
43.3M
          0.1741941265991622f,
312
43.3M
          -0.0476868035022925f,
313
43.3M
          0.1253805944856366f,
314
43.3M
          -0.4326608024727445f,
315
43.3M
          -0.2546827712406646f,
316
43.3M
      },
317
43.3M
      {
318
43.3M
          0.2500000000000000,
319
43.3M
          -0.1014005039375377f,
320
43.3M
          -0.4444481661973438f,
321
43.3M
          0.3085497062849487f,
322
43.3M
          0.0000000000000000,
323
43.3M
          -0.0643507165794628f,
324
43.3M
          0.1585450355183970f,
325
43.3M
          -0.5112616136592012f,
326
43.3M
          0.2579236279634129f,
327
43.3M
          0.0000000000000000,
328
43.3M
          -0.0812611176717504f,
329
43.3M
          -0.1856718091610990f,
330
43.3M
          -0.3416446842253373f,
331
43.3M
          0.3302282550303805f,
332
43.3M
          0.0702790691196282f,
333
43.3M
          -0.0741750459581023f,
334
43.3M
      },
335
43.3M
      {
336
43.3M
          0.2500000000000000,
337
43.3M
          -0.1014005039375376f,
338
43.3M
          -0.2929100136981264f,
339
43.3M
          0.0000000000000000,
340
43.3M
          0.0000000000000000,
341
43.3M
          -0.0643507165794627f,
342
43.3M
          0.3935103426921022f,
343
43.3M
          0.0657870154914254f,
344
43.3M
          0.0000000000000000,
345
43.3M
          0.4082482904638634f,
346
43.3M
          0.3078822139579031f,
347
43.3M
          0.3852501370925211f,
348
43.3M
          -0.0857401903551927f,
349
43.3M
          -0.4613374887461554f,
350
43.3M
          0.0000000000000000,
351
43.3M
          0.2191868483885728f,
352
43.3M
      },
353
43.3M
      {
354
43.3M
          0.2500000000000000,
355
43.3M
          -0.1014005039375376f,
356
43.3M
          -0.1137907446044814f,
357
43.3M
          -0.1464291867126654f,
358
43.3M
          0.0000000000000000,
359
43.3M
          -0.0643507165794627f,
360
43.3M
          0.0829816309488214f,
361
43.3M
          0.2388977352334547f,
362
43.3M
          -0.3531238544981624f,
363
43.3M
          0.4082482904638630f,
364
43.3M
          -0.4826689115059858f,
365
43.3M
          -0.1741941265991621f,
366
43.3M
          -0.0476868035022928f,
367
43.3M
          0.1253805944856431f,
368
43.3M
          -0.4326608024727457f,
369
43.3M
          -0.2546827712406641f,
370
43.3M
      },
371
43.3M
      {
372
43.3M
          0.2500000000000000,
373
43.3M
          -0.1014005039375374f,
374
43.3M
          0.0000000000000000,
375
43.3M
          0.4251149611657548f,
376
43.3M
          0.0000000000000000,
377
43.3M
          -0.0643507165794626f,
378
43.3M
          -0.4517556589999480f,
379
43.3M
          0.0000000000000000,
380
43.3M
          -0.6035859033230976f,
381
43.3M
          0.0000000000000000,
382
43.3M
          0.0000000000000000,
383
43.3M
          0.0000000000000000,
384
43.3M
          -0.1426608480880724f,
385
43.3M
          -0.1381354035075845f,
386
43.3M
          0.3487520519930227f,
387
43.3M
          0.1135498731499429f,
388
43.3M
      },
389
43.3M
  };
390
391
43.3M
  const HWY_CAPPED(float, 16) d;
392
129M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
86.6M
    auto scalar = Zero(d);
394
1.47G
    for (size_t j = 0; j < 16; j++) {
395
1.38G
      auto px = Set(d, pixels[j]);
396
1.38G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
1.38G
      scalar = MulAdd(px, basis, scalar);
398
1.38G
    }
399
86.6M
    Store(scalar, d, coeffs + i);
400
86.6M
  }
401
43.3M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
402
403
// Coefficient layout:
404
//  - (even, even) positions hold AFV coefficients
405
//  - (odd, even) positions hold DCT4x4 coefficients
406
//  - (any, odd) positions hold DCT4x8 coefficients
407
template <size_t afv_kind>
408
void AFVTransformFromPixels(const float* JXL_RESTRICT pixels,
409
                            size_t pixels_stride,
410
43.8M
                            float* JXL_RESTRICT coefficients) {
411
43.8M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
43.8M
  size_t afv_x = afv_kind & 1;
413
43.8M
  size_t afv_y = afv_kind / 2;
414
43.8M
  HWY_ALIGN float block[4 * 8] = {};
415
219M
  for (size_t iy = 0; iy < 4; iy++) {
416
877M
    for (size_t ix = 0; ix < 4; ix++) {
417
701M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
701M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
701M
    }
420
175M
  }
421
  // AFV coefficients in (even, even) positions.
422
43.8M
  HWY_ALIGN float coeff[4 * 4];
423
43.8M
  AFVDCT4x4(block, coeff);
424
219M
  for (size_t iy = 0; iy < 4; iy++) {
425
877M
    for (size_t ix = 0; ix < 4; ix++) {
426
701M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
701M
    }
428
175M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
43.8M
  ComputeScaledDCT<4, 4>()(
431
43.8M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
43.8M
              pixels_stride),
433
43.8M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
219M
  for (size_t iy = 0; iy < 4; iy++) {
436
1.57G
    for (size_t ix = 0; ix < 8; ix++) {
437
1.40G
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
1.40G
    }
439
175M
  }
440
  // 4x8 DCT of the other half of the block.
441
43.8M
  ComputeScaledDCT<4, 8>()(
442
43.8M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
43.8M
      block, scratch_space);
444
219M
  for (size_t iy = 0; iy < 4; iy++) {
445
1.57G
    for (size_t ix = 0; ix < 8; ix++) {
446
1.40G
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
1.40G
    }
448
175M
  }
449
43.8M
  float block00 = coefficients[0] * 0.25f;
450
43.8M
  float block01 = coefficients[1];
451
43.8M
  float block10 = coefficients[8];
452
43.8M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
43.8M
  coefficients[1] = (block00 - block01) * 0.5f;
454
43.8M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
43.8M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
92.5k
                            float* JXL_RESTRICT coefficients) {
411
92.5k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
92.5k
  size_t afv_x = afv_kind & 1;
413
92.5k
  size_t afv_y = afv_kind / 2;
414
92.5k
  HWY_ALIGN float block[4 * 8] = {};
415
462k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.85M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.48M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.48M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.48M
    }
420
370k
  }
421
  // AFV coefficients in (even, even) positions.
422
92.5k
  HWY_ALIGN float coeff[4 * 4];
423
92.5k
  AFVDCT4x4(block, coeff);
424
462k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.85M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.48M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.48M
    }
428
370k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
92.5k
  ComputeScaledDCT<4, 4>()(
431
92.5k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
92.5k
              pixels_stride),
433
92.5k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
462k
  for (size_t iy = 0; iy < 4; iy++) {
436
3.33M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.96M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.96M
    }
439
370k
  }
440
  // 4x8 DCT of the other half of the block.
441
92.5k
  ComputeScaledDCT<4, 8>()(
442
92.5k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
92.5k
      block, scratch_space);
444
462k
  for (size_t iy = 0; iy < 4; iy++) {
445
3.33M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.96M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.96M
    }
448
370k
  }
449
92.5k
  float block00 = coefficients[0] * 0.25f;
450
92.5k
  float block01 = coefficients[1];
451
92.5k
  float block10 = coefficients[8];
452
92.5k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
92.5k
  coefficients[1] = (block00 - block01) * 0.5f;
454
92.5k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
92.5k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
52.6k
                            float* JXL_RESTRICT coefficients) {
411
52.6k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
52.6k
  size_t afv_x = afv_kind & 1;
413
52.6k
  size_t afv_y = afv_kind / 2;
414
52.6k
  HWY_ALIGN float block[4 * 8] = {};
415
263k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.05M
    for (size_t ix = 0; ix < 4; ix++) {
417
842k
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
842k
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
842k
    }
420
210k
  }
421
  // AFV coefficients in (even, even) positions.
422
52.6k
  HWY_ALIGN float coeff[4 * 4];
423
52.6k
  AFVDCT4x4(block, coeff);
424
263k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.05M
    for (size_t ix = 0; ix < 4; ix++) {
426
842k
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
842k
    }
428
210k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
52.6k
  ComputeScaledDCT<4, 4>()(
431
52.6k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
52.6k
              pixels_stride),
433
52.6k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
263k
  for (size_t iy = 0; iy < 4; iy++) {
436
1.89M
    for (size_t ix = 0; ix < 8; ix++) {
437
1.68M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
1.68M
    }
439
210k
  }
440
  // 4x8 DCT of the other half of the block.
441
52.6k
  ComputeScaledDCT<4, 8>()(
442
52.6k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
52.6k
      block, scratch_space);
444
263k
  for (size_t iy = 0; iy < 4; iy++) {
445
1.89M
    for (size_t ix = 0; ix < 8; ix++) {
446
1.68M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
1.68M
    }
448
210k
  }
449
52.6k
  float block00 = coefficients[0] * 0.25f;
450
52.6k
  float block01 = coefficients[1];
451
52.6k
  float block10 = coefficients[8];
452
52.6k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
52.6k
  coefficients[1] = (block00 - block01) * 0.5f;
454
52.6k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
52.6k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
65.1k
                            float* JXL_RESTRICT coefficients) {
411
65.1k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
65.1k
  size_t afv_x = afv_kind & 1;
413
65.1k
  size_t afv_y = afv_kind / 2;
414
65.1k
  HWY_ALIGN float block[4 * 8] = {};
415
325k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.30M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.04M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.04M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.04M
    }
420
260k
  }
421
  // AFV coefficients in (even, even) positions.
422
65.1k
  HWY_ALIGN float coeff[4 * 4];
423
65.1k
  AFVDCT4x4(block, coeff);
424
325k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.30M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.04M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.04M
    }
428
260k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
65.1k
  ComputeScaledDCT<4, 4>()(
431
65.1k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
65.1k
              pixels_stride),
433
65.1k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
325k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.34M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.08M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.08M
    }
439
260k
  }
440
  // 4x8 DCT of the other half of the block.
441
65.1k
  ComputeScaledDCT<4, 8>()(
442
65.1k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
65.1k
      block, scratch_space);
444
325k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.34M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.08M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.08M
    }
448
260k
  }
449
65.1k
  float block00 = coefficients[0] * 0.25f;
450
65.1k
  float block01 = coefficients[1];
451
65.1k
  float block10 = coefficients[8];
452
65.1k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
65.1k
  coefficients[1] = (block00 - block01) * 0.5f;
454
65.1k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
65.1k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
63.0k
                            float* JXL_RESTRICT coefficients) {
411
63.0k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
63.0k
  size_t afv_x = afv_kind & 1;
413
63.0k
  size_t afv_y = afv_kind / 2;
414
63.0k
  HWY_ALIGN float block[4 * 8] = {};
415
315k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.26M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.00M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.00M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.00M
    }
420
252k
  }
421
  // AFV coefficients in (even, even) positions.
422
63.0k
  HWY_ALIGN float coeff[4 * 4];
423
63.0k
  AFVDCT4x4(block, coeff);
424
315k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.26M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.00M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.00M
    }
428
252k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
63.0k
  ComputeScaledDCT<4, 4>()(
431
63.0k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
63.0k
              pixels_stride),
433
63.0k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
315k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.27M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.01M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.01M
    }
439
252k
  }
440
  // 4x8 DCT of the other half of the block.
441
63.0k
  ComputeScaledDCT<4, 8>()(
442
63.0k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
63.0k
      block, scratch_space);
444
315k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.27M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.01M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.01M
    }
448
252k
  }
449
63.0k
  float block00 = coefficients[0] * 0.25f;
450
63.0k
  float block01 = coefficients[1];
451
63.0k
  float block10 = coefficients[8];
452
63.0k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
63.0k
  coefficients[1] = (block00 - block01) * 0.5f;
454
63.0k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
63.0k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
92.5k
                            float* JXL_RESTRICT coefficients) {
411
92.5k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
92.5k
  size_t afv_x = afv_kind & 1;
413
92.5k
  size_t afv_y = afv_kind / 2;
414
92.5k
  HWY_ALIGN float block[4 * 8] = {};
415
462k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.85M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.48M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.48M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.48M
    }
420
370k
  }
421
  // AFV coefficients in (even, even) positions.
422
92.5k
  HWY_ALIGN float coeff[4 * 4];
423
92.5k
  AFVDCT4x4(block, coeff);
424
462k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.85M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.48M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.48M
    }
428
370k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
92.5k
  ComputeScaledDCT<4, 4>()(
431
92.5k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
92.5k
              pixels_stride),
433
92.5k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
462k
  for (size_t iy = 0; iy < 4; iy++) {
436
3.33M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.96M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.96M
    }
439
370k
  }
440
  // 4x8 DCT of the other half of the block.
441
92.5k
  ComputeScaledDCT<4, 8>()(
442
92.5k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
92.5k
      block, scratch_space);
444
462k
  for (size_t iy = 0; iy < 4; iy++) {
445
3.33M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.96M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.96M
    }
448
370k
  }
449
92.5k
  float block00 = coefficients[0] * 0.25f;
450
92.5k
  float block01 = coefficients[1];
451
92.5k
  float block10 = coefficients[8];
452
92.5k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
92.5k
  coefficients[1] = (block00 - block01) * 0.5f;
454
92.5k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
92.5k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
52.6k
                            float* JXL_RESTRICT coefficients) {
411
52.6k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
52.6k
  size_t afv_x = afv_kind & 1;
413
52.6k
  size_t afv_y = afv_kind / 2;
414
52.6k
  HWY_ALIGN float block[4 * 8] = {};
415
263k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.05M
    for (size_t ix = 0; ix < 4; ix++) {
417
842k
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
842k
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
842k
    }
420
210k
  }
421
  // AFV coefficients in (even, even) positions.
422
52.6k
  HWY_ALIGN float coeff[4 * 4];
423
52.6k
  AFVDCT4x4(block, coeff);
424
263k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.05M
    for (size_t ix = 0; ix < 4; ix++) {
426
842k
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
842k
    }
428
210k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
52.6k
  ComputeScaledDCT<4, 4>()(
431
52.6k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
52.6k
              pixels_stride),
433
52.6k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
263k
  for (size_t iy = 0; iy < 4; iy++) {
436
1.89M
    for (size_t ix = 0; ix < 8; ix++) {
437
1.68M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
1.68M
    }
439
210k
  }
440
  // 4x8 DCT of the other half of the block.
441
52.6k
  ComputeScaledDCT<4, 8>()(
442
52.6k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
52.6k
      block, scratch_space);
444
263k
  for (size_t iy = 0; iy < 4; iy++) {
445
1.89M
    for (size_t ix = 0; ix < 8; ix++) {
446
1.68M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
1.68M
    }
448
210k
  }
449
52.6k
  float block00 = coefficients[0] * 0.25f;
450
52.6k
  float block01 = coefficients[1];
451
52.6k
  float block10 = coefficients[8];
452
52.6k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
52.6k
  coefficients[1] = (block00 - block01) * 0.5f;
454
52.6k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
52.6k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
65.1k
                            float* JXL_RESTRICT coefficients) {
411
65.1k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
65.1k
  size_t afv_x = afv_kind & 1;
413
65.1k
  size_t afv_y = afv_kind / 2;
414
65.1k
  HWY_ALIGN float block[4 * 8] = {};
415
325k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.30M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.04M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.04M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.04M
    }
420
260k
  }
421
  // AFV coefficients in (even, even) positions.
422
65.1k
  HWY_ALIGN float coeff[4 * 4];
423
65.1k
  AFVDCT4x4(block, coeff);
424
325k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.30M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.04M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.04M
    }
428
260k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
65.1k
  ComputeScaledDCT<4, 4>()(
431
65.1k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
65.1k
              pixels_stride),
433
65.1k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
325k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.34M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.08M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.08M
    }
439
260k
  }
440
  // 4x8 DCT of the other half of the block.
441
65.1k
  ComputeScaledDCT<4, 8>()(
442
65.1k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
65.1k
      block, scratch_space);
444
325k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.34M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.08M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.08M
    }
448
260k
  }
449
65.1k
  float block00 = coefficients[0] * 0.25f;
450
65.1k
  float block01 = coefficients[1];
451
65.1k
  float block10 = coefficients[8];
452
65.1k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
65.1k
  coefficients[1] = (block00 - block01) * 0.5f;
454
65.1k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
65.1k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
63.0k
                            float* JXL_RESTRICT coefficients) {
411
63.0k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
63.0k
  size_t afv_x = afv_kind & 1;
413
63.0k
  size_t afv_y = afv_kind / 2;
414
63.0k
  HWY_ALIGN float block[4 * 8] = {};
415
315k
  for (size_t iy = 0; iy < 4; iy++) {
416
1.26M
    for (size_t ix = 0; ix < 4; ix++) {
417
1.00M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.00M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.00M
    }
420
252k
  }
421
  // AFV coefficients in (even, even) positions.
422
63.0k
  HWY_ALIGN float coeff[4 * 4];
423
63.0k
  AFVDCT4x4(block, coeff);
424
315k
  for (size_t iy = 0; iy < 4; iy++) {
425
1.26M
    for (size_t ix = 0; ix < 4; ix++) {
426
1.00M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.00M
    }
428
252k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
63.0k
  ComputeScaledDCT<4, 4>()(
431
63.0k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
63.0k
              pixels_stride),
433
63.0k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
315k
  for (size_t iy = 0; iy < 4; iy++) {
436
2.27M
    for (size_t ix = 0; ix < 8; ix++) {
437
2.01M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.01M
    }
439
252k
  }
440
  // 4x8 DCT of the other half of the block.
441
63.0k
  ComputeScaledDCT<4, 8>()(
442
63.0k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
63.0k
      block, scratch_space);
444
315k
  for (size_t iy = 0; iy < 4; iy++) {
445
2.27M
    for (size_t ix = 0; ix < 8; ix++) {
446
2.01M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.01M
    }
448
252k
  }
449
63.0k
  float block00 = coefficients[0] * 0.25f;
450
63.0k
  float block01 = coefficients[1];
451
63.0k
  float block10 = coefficients[8];
452
63.0k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
63.0k
  coefficients[1] = (block00 - block01) * 0.5f;
454
63.0k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
63.0k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
10.8M
                            float* JXL_RESTRICT coefficients) {
411
10.8M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
10.8M
  size_t afv_x = afv_kind & 1;
413
10.8M
  size_t afv_y = afv_kind / 2;
414
10.8M
  HWY_ALIGN float block[4 * 8] = {};
415
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
416
216M
    for (size_t ix = 0; ix < 4; ix++) {
417
173M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
173M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
173M
    }
420
43.3M
  }
421
  // AFV coefficients in (even, even) positions.
422
10.8M
  HWY_ALIGN float coeff[4 * 4];
423
10.8M
  AFVDCT4x4(block, coeff);
424
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
425
216M
    for (size_t ix = 0; ix < 4; ix++) {
426
173M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
173M
    }
428
43.3M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
10.8M
  ComputeScaledDCT<4, 4>()(
431
10.8M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
10.8M
              pixels_stride),
433
10.8M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
436
389M
    for (size_t ix = 0; ix < 8; ix++) {
437
346M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
346M
    }
439
43.3M
  }
440
  // 4x8 DCT of the other half of the block.
441
10.8M
  ComputeScaledDCT<4, 8>()(
442
10.8M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
10.8M
      block, scratch_space);
444
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
445
389M
    for (size_t ix = 0; ix < 8; ix++) {
446
346M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
346M
    }
448
43.3M
  }
449
10.8M
  float block00 = coefficients[0] * 0.25f;
450
10.8M
  float block01 = coefficients[1];
451
10.8M
  float block10 = coefficients[8];
452
10.8M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
10.8M
  coefficients[1] = (block00 - block01) * 0.5f;
454
10.8M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
10.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
10.8M
                            float* JXL_RESTRICT coefficients) {
411
10.8M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
10.8M
  size_t afv_x = afv_kind & 1;
413
10.8M
  size_t afv_y = afv_kind / 2;
414
10.8M
  HWY_ALIGN float block[4 * 8] = {};
415
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
416
216M
    for (size_t ix = 0; ix < 4; ix++) {
417
173M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
173M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
173M
    }
420
43.3M
  }
421
  // AFV coefficients in (even, even) positions.
422
10.8M
  HWY_ALIGN float coeff[4 * 4];
423
10.8M
  AFVDCT4x4(block, coeff);
424
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
425
216M
    for (size_t ix = 0; ix < 4; ix++) {
426
173M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
173M
    }
428
43.3M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
10.8M
  ComputeScaledDCT<4, 4>()(
431
10.8M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
10.8M
              pixels_stride),
433
10.8M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
436
389M
    for (size_t ix = 0; ix < 8; ix++) {
437
346M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
346M
    }
439
43.3M
  }
440
  // 4x8 DCT of the other half of the block.
441
10.8M
  ComputeScaledDCT<4, 8>()(
442
10.8M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
10.8M
      block, scratch_space);
444
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
445
389M
    for (size_t ix = 0; ix < 8; ix++) {
446
346M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
346M
    }
448
43.3M
  }
449
10.8M
  float block00 = coefficients[0] * 0.25f;
450
10.8M
  float block01 = coefficients[1];
451
10.8M
  float block10 = coefficients[8];
452
10.8M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
10.8M
  coefficients[1] = (block00 - block01) * 0.5f;
454
10.8M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
10.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
10.8M
                            float* JXL_RESTRICT coefficients) {
411
10.8M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
10.8M
  size_t afv_x = afv_kind & 1;
413
10.8M
  size_t afv_y = afv_kind / 2;
414
10.8M
  HWY_ALIGN float block[4 * 8] = {};
415
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
416
216M
    for (size_t ix = 0; ix < 4; ix++) {
417
173M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
173M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
173M
    }
420
43.3M
  }
421
  // AFV coefficients in (even, even) positions.
422
10.8M
  HWY_ALIGN float coeff[4 * 4];
423
10.8M
  AFVDCT4x4(block, coeff);
424
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
425
216M
    for (size_t ix = 0; ix < 4; ix++) {
426
173M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
173M
    }
428
43.3M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
10.8M
  ComputeScaledDCT<4, 4>()(
431
10.8M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
10.8M
              pixels_stride),
433
10.8M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
436
389M
    for (size_t ix = 0; ix < 8; ix++) {
437
346M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
346M
    }
439
43.3M
  }
440
  // 4x8 DCT of the other half of the block.
441
10.8M
  ComputeScaledDCT<4, 8>()(
442
10.8M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
10.8M
      block, scratch_space);
444
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
445
389M
    for (size_t ix = 0; ix < 8; ix++) {
446
346M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
346M
    }
448
43.3M
  }
449
10.8M
  float block00 = coefficients[0] * 0.25f;
450
10.8M
  float block01 = coefficients[1];
451
10.8M
  float block10 = coefficients[8];
452
10.8M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
10.8M
  coefficients[1] = (block00 - block01) * 0.5f;
454
10.8M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
10.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
10.8M
                            float* JXL_RESTRICT coefficients) {
411
10.8M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
10.8M
  size_t afv_x = afv_kind & 1;
413
10.8M
  size_t afv_y = afv_kind / 2;
414
10.8M
  HWY_ALIGN float block[4 * 8] = {};
415
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
416
216M
    for (size_t ix = 0; ix < 4; ix++) {
417
173M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
173M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
173M
    }
420
43.3M
  }
421
  // AFV coefficients in (even, even) positions.
422
10.8M
  HWY_ALIGN float coeff[4 * 4];
423
10.8M
  AFVDCT4x4(block, coeff);
424
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
425
216M
    for (size_t ix = 0; ix < 4; ix++) {
426
173M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
173M
    }
428
43.3M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
10.8M
  ComputeScaledDCT<4, 4>()(
431
10.8M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
10.8M
              pixels_stride),
433
10.8M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
436
389M
    for (size_t ix = 0; ix < 8; ix++) {
437
346M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
346M
    }
439
43.3M
  }
440
  // 4x8 DCT of the other half of the block.
441
10.8M
  ComputeScaledDCT<4, 8>()(
442
10.8M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
10.8M
      block, scratch_space);
444
54.1M
  for (size_t iy = 0; iy < 4; iy++) {
445
389M
    for (size_t ix = 0; ix < 8; ix++) {
446
346M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
346M
    }
448
43.3M
  }
449
10.8M
  float block00 = coefficients[0] * 0.25f;
450
10.8M
  float block01 = coefficients[1];
451
10.8M
  float block10 = coefficients[8];
452
10.8M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
10.8M
  coefficients[1] = (block00 - block01) * 0.5f;
454
10.8M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
10.8M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
456
457
HWY_MAYBE_UNUSED void TransformFromPixels(const AcStrategyType strategy,
458
                                          const float* JXL_RESTRICT pixels,
459
                                          size_t pixels_stride,
460
                                          float* JXL_RESTRICT coefficients,
461
157M
                                          float* JXL_RESTRICT scratch_space) {
462
157M
  using Type = AcStrategyType;
463
157M
  switch (strategy) {
464
12.2M
    case Type::IDENTITY: {
465
36.6M
      for (size_t y = 0; y < 2; y++) {
466
73.2M
        for (size_t x = 0; x < 2; x++) {
467
48.8M
          float block_dc = 0;
468
244M
          for (size_t iy = 0; iy < 4; iy++) {
469
976M
            for (size_t ix = 0; ix < 4; ix++) {
470
781M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
781M
            }
472
195M
          }
473
48.8M
          block_dc *= 1.0f / 16;
474
244M
          for (size_t iy = 0; iy < 4; iy++) {
475
976M
            for (size_t ix = 0; ix < 4; ix++) {
476
781M
              if (ix == 1 && iy == 1) continue;
477
732M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
732M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
732M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
732M
            }
481
195M
          }
482
48.8M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
48.8M
          coefficients[y * 8 + x] = block_dc;
484
48.8M
        }
485
24.4M
      }
486
12.2M
      float block00 = coefficients[0];
487
12.2M
      float block01 = coefficients[1];
488
12.2M
      float block10 = coefficients[8];
489
12.2M
      float block11 = coefficients[9];
490
12.2M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
12.2M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
12.2M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
12.2M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
12.2M
      break;
495
0
    }
496
11.0M
    case Type::DCT8X4: {
497
33.2M
      for (size_t x = 0; x < 2; x++) {
498
22.1M
        HWY_ALIGN float block[4 * 8];
499
22.1M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
22.1M
                                 scratch_space);
501
110M
        for (size_t iy = 0; iy < 4; iy++) {
502
798M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
709M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
709M
          }
506
88.7M
        }
507
22.1M
      }
508
11.0M
      float block0 = coefficients[0];
509
11.0M
      float block1 = coefficients[8];
510
11.0M
      coefficients[0] = (block0 + block1) * 0.5f;
511
11.0M
      coefficients[8] = (block0 - block1) * 0.5f;
512
11.0M
      break;
513
0
    }
514
10.9M
    case Type::DCT4X8: {
515
32.8M
      for (size_t y = 0; y < 2; y++) {
516
21.8M
        HWY_ALIGN float block[4 * 8];
517
21.8M
        ComputeScaledDCT<4, 8>()(
518
21.8M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
21.8M
            scratch_space);
520
109M
        for (size_t iy = 0; iy < 4; iy++) {
521
787M
          for (size_t ix = 0; ix < 8; ix++) {
522
700M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
700M
          }
524
87.5M
        }
525
21.8M
      }
526
10.9M
      float block0 = coefficients[0];
527
10.9M
      float block1 = coefficients[8];
528
10.9M
      coefficients[0] = (block0 + block1) * 0.5f;
529
10.9M
      coefficients[8] = (block0 - block1) * 0.5f;
530
10.9M
      break;
531
0
    }
532
10.8M
    case Type::DCT4X4: {
533
32.4M
      for (size_t y = 0; y < 2; y++) {
534
64.9M
        for (size_t x = 0; x < 2; x++) {
535
43.3M
          HWY_ALIGN float block[4 * 4];
536
43.3M
          ComputeScaledDCT<4, 4>()(
537
43.3M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
43.3M
              block, scratch_space);
539
216M
          for (size_t iy = 0; iy < 4; iy++) {
540
866M
            for (size_t ix = 0; ix < 4; ix++) {
541
692M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
692M
            }
543
173M
          }
544
43.3M
        }
545
21.6M
      }
546
10.8M
      float block00 = coefficients[0];
547
10.8M
      float block01 = coefficients[1];
548
10.8M
      float block10 = coefficients[8];
549
10.8M
      float block11 = coefficients[9];
550
10.8M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
10.8M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
10.8M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
10.8M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
10.8M
      break;
555
0
    }
556
14.6M
    case Type::DCT2X2: {
557
14.6M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
14.6M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
14.6M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
14.6M
      break;
561
0
    }
562
4.80M
    case Type::DCT16X16: {
563
4.80M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
4.80M
                                 scratch_space);
565
4.80M
      break;
566
0
    }
567
9.32M
    case Type::DCT16X8: {
568
9.32M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
9.32M
                                scratch_space);
570
9.32M
      break;
571
0
    }
572
9.33M
    case Type::DCT8X16: {
573
9.33M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
9.33M
                                scratch_space);
575
9.33M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
1.83M
    case Type::DCT32X16: {
588
1.83M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
1.83M
                                 scratch_space);
590
1.83M
      break;
591
0
    }
592
1.81M
    case Type::DCT16X32: {
593
1.81M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
1.81M
                                 scratch_space);
595
1.81M
      break;
596
0
    }
597
1.02M
    case Type::DCT32X32: {
598
1.02M
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
1.02M
                                 scratch_space);
600
1.02M
      break;
601
0
    }
602
24.2M
    case Type::DCT: {
603
24.2M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
24.2M
                               scratch_space);
605
24.2M
      break;
606
0
    }
607
11.0M
    case Type::AFV0: {
608
11.0M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
11.0M
      break;
610
0
    }
611
10.9M
    case Type::AFV1: {
612
10.9M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
10.9M
      break;
614
0
    }
615
10.9M
    case Type::AFV2: {
616
10.9M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
10.9M
      break;
618
0
    }
619
10.9M
    case Type::AFV3: {
620
10.9M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
10.9M
      break;
622
0
    }
623
229k
    case Type::DCT64X64: {
624
229k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
229k
                                 scratch_space);
626
229k
      break;
627
0
    }
628
553k
    case Type::DCT64X32: {
629
553k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
553k
                                 scratch_space);
631
553k
      break;
632
0
    }
633
333k
    case Type::DCT32X64: {
634
333k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
333k
                                 scratch_space);
636
333k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
157M
  }
669
157M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
5.16M
                                          float* JXL_RESTRICT scratch_space) {
462
5.16M
  using Type = AcStrategyType;
463
5.16M
  switch (strategy) {
464
688k
    case Type::IDENTITY: {
465
2.06M
      for (size_t y = 0; y < 2; y++) {
466
4.13M
        for (size_t x = 0; x < 2; x++) {
467
2.75M
          float block_dc = 0;
468
13.7M
          for (size_t iy = 0; iy < 4; iy++) {
469
55.0M
            for (size_t ix = 0; ix < 4; ix++) {
470
44.0M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
44.0M
            }
472
11.0M
          }
473
2.75M
          block_dc *= 1.0f / 16;
474
13.7M
          for (size_t iy = 0; iy < 4; iy++) {
475
55.0M
            for (size_t ix = 0; ix < 4; ix++) {
476
44.0M
              if (ix == 1 && iy == 1) continue;
477
41.3M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
41.3M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
41.3M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
41.3M
            }
481
11.0M
          }
482
2.75M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
2.75M
          coefficients[y * 8 + x] = block_dc;
484
2.75M
        }
485
1.37M
      }
486
688k
      float block00 = coefficients[0];
487
688k
      float block01 = coefficients[1];
488
688k
      float block10 = coefficients[8];
489
688k
      float block11 = coefficients[9];
490
688k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
688k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
688k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
688k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
688k
      break;
495
0
    }
496
132k
    case Type::DCT8X4: {
497
396k
      for (size_t x = 0; x < 2; x++) {
498
264k
        HWY_ALIGN float block[4 * 8];
499
264k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
264k
                                 scratch_space);
501
1.32M
        for (size_t iy = 0; iy < 4; iy++) {
502
9.50M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
8.45M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
8.45M
          }
506
1.05M
        }
507
264k
      }
508
132k
      float block0 = coefficients[0];
509
132k
      float block1 = coefficients[8];
510
132k
      coefficients[0] = (block0 + block1) * 0.5f;
511
132k
      coefficients[8] = (block0 - block1) * 0.5f;
512
132k
      break;
513
0
    }
514
56.4k
    case Type::DCT4X8: {
515
169k
      for (size_t y = 0; y < 2; y++) {
516
112k
        HWY_ALIGN float block[4 * 8];
517
112k
        ComputeScaledDCT<4, 8>()(
518
112k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
112k
            scratch_space);
520
564k
        for (size_t iy = 0; iy < 4; iy++) {
521
4.06M
          for (size_t ix = 0; ix < 8; ix++) {
522
3.61M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
3.61M
          }
524
451k
        }
525
112k
      }
526
56.4k
      float block0 = coefficients[0];
527
56.4k
      float block1 = coefficients[8];
528
56.4k
      coefficients[0] = (block0 + block1) * 0.5f;
529
56.4k
      coefficients[8] = (block0 - block1) * 0.5f;
530
56.4k
      break;
531
0
    }
532
60
    case Type::DCT4X4: {
533
180
      for (size_t y = 0; y < 2; y++) {
534
360
        for (size_t x = 0; x < 2; x++) {
535
240
          HWY_ALIGN float block[4 * 4];
536
240
          ComputeScaledDCT<4, 4>()(
537
240
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
240
              block, scratch_space);
539
1.20k
          for (size_t iy = 0; iy < 4; iy++) {
540
4.80k
            for (size_t ix = 0; ix < 4; ix++) {
541
3.84k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
3.84k
            }
543
960
          }
544
240
        }
545
120
      }
546
60
      float block00 = coefficients[0];
547
60
      float block01 = coefficients[1];
548
60
      float block10 = coefficients[8];
549
60
      float block11 = coefficients[9];
550
60
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
60
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
60
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
60
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
60
      break;
555
0
    }
556
1.91M
    case Type::DCT2X2: {
557
1.91M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
1.91M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
1.91M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
1.91M
      break;
561
0
    }
562
140k
    case Type::DCT16X16: {
563
140k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
140k
                                 scratch_space);
565
140k
      break;
566
0
    }
567
203k
    case Type::DCT16X8: {
568
203k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
203k
                                scratch_space);
570
203k
      break;
571
0
    }
572
218k
    case Type::DCT8X16: {
573
218k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
218k
                                scratch_space);
575
218k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
45.9k
    case Type::DCT32X16: {
588
45.9k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
45.9k
                                 scratch_space);
590
45.9k
      break;
591
0
    }
592
44.7k
    case Type::DCT16X32: {
593
44.7k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
44.7k
                                 scratch_space);
595
44.7k
      break;
596
0
    }
597
69.7k
    case Type::DCT32X32: {
598
69.7k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
69.7k
                                 scratch_space);
600
69.7k
      break;
601
0
    }
602
1.31M
    case Type::DCT: {
603
1.31M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
1.31M
                               scratch_space);
605
1.31M
      break;
606
0
    }
607
92.5k
    case Type::AFV0: {
608
92.5k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
92.5k
      break;
610
0
    }
611
52.6k
    case Type::AFV1: {
612
52.6k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
52.6k
      break;
614
0
    }
615
65.1k
    case Type::AFV2: {
616
65.1k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
65.1k
      break;
618
0
    }
619
63.0k
    case Type::AFV3: {
620
63.0k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
63.0k
      break;
622
0
    }
623
40.1k
    case Type::DCT64X64: {
624
40.1k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
40.1k
                                 scratch_space);
626
40.1k
      break;
627
0
    }
628
13.9k
    case Type::DCT64X32: {
629
13.9k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
13.9k
                                 scratch_space);
631
13.9k
      break;
632
0
    }
633
5.66k
    case Type::DCT32X64: {
634
5.66k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
5.66k
                                 scratch_space);
636
5.66k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
5.16M
  }
669
5.16M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
15.9M
                                          float* JXL_RESTRICT scratch_space) {
462
15.9M
  using Type = AcStrategyType;
463
15.9M
  switch (strategy) {
464
688k
    case Type::IDENTITY: {
465
2.06M
      for (size_t y = 0; y < 2; y++) {
466
4.13M
        for (size_t x = 0; x < 2; x++) {
467
2.75M
          float block_dc = 0;
468
13.7M
          for (size_t iy = 0; iy < 4; iy++) {
469
55.0M
            for (size_t ix = 0; ix < 4; ix++) {
470
44.0M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
44.0M
            }
472
11.0M
          }
473
2.75M
          block_dc *= 1.0f / 16;
474
13.7M
          for (size_t iy = 0; iy < 4; iy++) {
475
55.0M
            for (size_t ix = 0; ix < 4; ix++) {
476
44.0M
              if (ix == 1 && iy == 1) continue;
477
41.3M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
41.3M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
41.3M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
41.3M
            }
481
11.0M
          }
482
2.75M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
2.75M
          coefficients[y * 8 + x] = block_dc;
484
2.75M
        }
485
1.37M
      }
486
688k
      float block00 = coefficients[0];
487
688k
      float block01 = coefficients[1];
488
688k
      float block10 = coefficients[8];
489
688k
      float block11 = coefficients[9];
490
688k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
688k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
688k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
688k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
688k
      break;
495
0
    }
496
132k
    case Type::DCT8X4: {
497
396k
      for (size_t x = 0; x < 2; x++) {
498
264k
        HWY_ALIGN float block[4 * 8];
499
264k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
264k
                                 scratch_space);
501
1.32M
        for (size_t iy = 0; iy < 4; iy++) {
502
9.50M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
8.45M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
8.45M
          }
506
1.05M
        }
507
264k
      }
508
132k
      float block0 = coefficients[0];
509
132k
      float block1 = coefficients[8];
510
132k
      coefficients[0] = (block0 + block1) * 0.5f;
511
132k
      coefficients[8] = (block0 - block1) * 0.5f;
512
132k
      break;
513
0
    }
514
56.4k
    case Type::DCT4X8: {
515
169k
      for (size_t y = 0; y < 2; y++) {
516
112k
        HWY_ALIGN float block[4 * 8];
517
112k
        ComputeScaledDCT<4, 8>()(
518
112k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
112k
            scratch_space);
520
564k
        for (size_t iy = 0; iy < 4; iy++) {
521
4.06M
          for (size_t ix = 0; ix < 8; ix++) {
522
3.61M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
3.61M
          }
524
451k
        }
525
112k
      }
526
56.4k
      float block0 = coefficients[0];
527
56.4k
      float block1 = coefficients[8];
528
56.4k
      coefficients[0] = (block0 + block1) * 0.5f;
529
56.4k
      coefficients[8] = (block0 - block1) * 0.5f;
530
56.4k
      break;
531
0
    }
532
60
    case Type::DCT4X4: {
533
180
      for (size_t y = 0; y < 2; y++) {
534
360
        for (size_t x = 0; x < 2; x++) {
535
240
          HWY_ALIGN float block[4 * 4];
536
240
          ComputeScaledDCT<4, 4>()(
537
240
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
240
              block, scratch_space);
539
1.20k
          for (size_t iy = 0; iy < 4; iy++) {
540
4.80k
            for (size_t ix = 0; ix < 4; ix++) {
541
3.84k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
3.84k
            }
543
960
          }
544
240
        }
545
120
      }
546
60
      float block00 = coefficients[0];
547
60
      float block01 = coefficients[1];
548
60
      float block10 = coefficients[8];
549
60
      float block11 = coefficients[9];
550
60
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
60
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
60
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
60
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
60
      break;
555
0
    }
556
1.91M
    case Type::DCT2X2: {
557
1.91M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
1.91M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
1.91M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
1.91M
      break;
561
0
    }
562
140k
    case Type::DCT16X16: {
563
140k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
140k
                                 scratch_space);
565
140k
      break;
566
0
    }
567
203k
    case Type::DCT16X8: {
568
203k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
203k
                                scratch_space);
570
203k
      break;
571
0
    }
572
218k
    case Type::DCT8X16: {
573
218k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
218k
                                scratch_space);
575
218k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
45.9k
    case Type::DCT32X16: {
588
45.9k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
45.9k
                                 scratch_space);
590
45.9k
      break;
591
0
    }
592
44.7k
    case Type::DCT16X32: {
593
44.7k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
44.7k
                                 scratch_space);
595
44.7k
      break;
596
0
    }
597
69.7k
    case Type::DCT32X32: {
598
69.7k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
69.7k
                                 scratch_space);
600
69.7k
      break;
601
0
    }
602
12.1M
    case Type::DCT: {
603
12.1M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
12.1M
                               scratch_space);
605
12.1M
      break;
606
0
    }
607
92.5k
    case Type::AFV0: {
608
92.5k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
92.5k
      break;
610
0
    }
611
52.6k
    case Type::AFV1: {
612
52.6k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
52.6k
      break;
614
0
    }
615
65.1k
    case Type::AFV2: {
616
65.1k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
65.1k
      break;
618
0
    }
619
63.0k
    case Type::AFV3: {
620
63.0k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
63.0k
      break;
622
0
    }
623
40.1k
    case Type::DCT64X64: {
624
40.1k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
40.1k
                                 scratch_space);
626
40.1k
      break;
627
0
    }
628
13.9k
    case Type::DCT64X32: {
629
13.9k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
13.9k
                                 scratch_space);
631
13.9k
      break;
632
0
    }
633
5.66k
    case Type::DCT32X64: {
634
5.66k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
5.66k
                                 scratch_space);
636
5.66k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
15.9M
  }
669
15.9M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
135M
                                          float* JXL_RESTRICT scratch_space) {
462
135M
  using Type = AcStrategyType;
463
135M
  switch (strategy) {
464
10.8M
    case Type::IDENTITY: {
465
32.4M
      for (size_t y = 0; y < 2; y++) {
466
64.9M
        for (size_t x = 0; x < 2; x++) {
467
43.3M
          float block_dc = 0;
468
216M
          for (size_t iy = 0; iy < 4; iy++) {
469
866M
            for (size_t ix = 0; ix < 4; ix++) {
470
692M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
692M
            }
472
173M
          }
473
43.3M
          block_dc *= 1.0f / 16;
474
216M
          for (size_t iy = 0; iy < 4; iy++) {
475
866M
            for (size_t ix = 0; ix < 4; ix++) {
476
692M
              if (ix == 1 && iy == 1) continue;
477
649M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
649M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
649M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
649M
            }
481
173M
          }
482
43.3M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
43.3M
          coefficients[y * 8 + x] = block_dc;
484
43.3M
        }
485
21.6M
      }
486
10.8M
      float block00 = coefficients[0];
487
10.8M
      float block01 = coefficients[1];
488
10.8M
      float block10 = coefficients[8];
489
10.8M
      float block11 = coefficients[9];
490
10.8M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
10.8M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
10.8M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
10.8M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
10.8M
      break;
495
0
    }
496
10.8M
    case Type::DCT8X4: {
497
32.4M
      for (size_t x = 0; x < 2; x++) {
498
21.6M
        HWY_ALIGN float block[4 * 8];
499
21.6M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
21.6M
                                 scratch_space);
501
108M
        for (size_t iy = 0; iy < 4; iy++) {
502
779M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
692M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
692M
          }
506
86.6M
        }
507
21.6M
      }
508
10.8M
      float block0 = coefficients[0];
509
10.8M
      float block1 = coefficients[8];
510
10.8M
      coefficients[0] = (block0 + block1) * 0.5f;
511
10.8M
      coefficients[8] = (block0 - block1) * 0.5f;
512
10.8M
      break;
513
0
    }
514
10.8M
    case Type::DCT4X8: {
515
32.4M
      for (size_t y = 0; y < 2; y++) {
516
21.6M
        HWY_ALIGN float block[4 * 8];
517
21.6M
        ComputeScaledDCT<4, 8>()(
518
21.6M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
21.6M
            scratch_space);
520
108M
        for (size_t iy = 0; iy < 4; iy++) {
521
779M
          for (size_t ix = 0; ix < 8; ix++) {
522
692M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
692M
          }
524
86.6M
        }
525
21.6M
      }
526
10.8M
      float block0 = coefficients[0];
527
10.8M
      float block1 = coefficients[8];
528
10.8M
      coefficients[0] = (block0 + block1) * 0.5f;
529
10.8M
      coefficients[8] = (block0 - block1) * 0.5f;
530
10.8M
      break;
531
0
    }
532
10.8M
    case Type::DCT4X4: {
533
32.4M
      for (size_t y = 0; y < 2; y++) {
534
64.9M
        for (size_t x = 0; x < 2; x++) {
535
43.3M
          HWY_ALIGN float block[4 * 4];
536
43.3M
          ComputeScaledDCT<4, 4>()(
537
43.3M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
43.3M
              block, scratch_space);
539
216M
          for (size_t iy = 0; iy < 4; iy++) {
540
866M
            for (size_t ix = 0; ix < 4; ix++) {
541
692M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
692M
            }
543
173M
          }
544
43.3M
        }
545
21.6M
      }
546
10.8M
      float block00 = coefficients[0];
547
10.8M
      float block01 = coefficients[1];
548
10.8M
      float block10 = coefficients[8];
549
10.8M
      float block11 = coefficients[9];
550
10.8M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
10.8M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
10.8M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
10.8M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
10.8M
      break;
555
0
    }
556
10.8M
    case Type::DCT2X2: {
557
10.8M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
10.8M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
10.8M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
10.8M
      break;
561
0
    }
562
4.51M
    case Type::DCT16X16: {
563
4.51M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
4.51M
                                 scratch_space);
565
4.51M
      break;
566
0
    }
567
8.91M
    case Type::DCT16X8: {
568
8.91M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
8.91M
                                scratch_space);
570
8.91M
      break;
571
0
    }
572
8.89M
    case Type::DCT8X16: {
573
8.89M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
8.89M
                                scratch_space);
575
8.89M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
1.74M
    case Type::DCT32X16: {
588
1.74M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
1.74M
                                 scratch_space);
590
1.74M
      break;
591
0
    }
592
1.72M
    case Type::DCT16X32: {
593
1.72M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
1.72M
                                 scratch_space);
595
1.72M
      break;
596
0
    }
597
881k
    case Type::DCT32X32: {
598
881k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
881k
                                 scratch_space);
600
881k
      break;
601
0
    }
602
10.8M
    case Type::DCT: {
603
10.8M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
10.8M
                               scratch_space);
605
10.8M
      break;
606
0
    }
607
10.8M
    case Type::AFV0: {
608
10.8M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
10.8M
      break;
610
0
    }
611
10.8M
    case Type::AFV1: {
612
10.8M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
10.8M
      break;
614
0
    }
615
10.8M
    case Type::AFV2: {
616
10.8M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
10.8M
      break;
618
0
    }
619
10.8M
    case Type::AFV3: {
620
10.8M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
10.8M
      break;
622
0
    }
623
148k
    case Type::DCT64X64: {
624
148k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
148k
                                 scratch_space);
626
148k
      break;
627
0
    }
628
525k
    case Type::DCT64X32: {
629
525k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
525k
                                 scratch_space);
631
525k
      break;
632
0
    }
633
321k
    case Type::DCT32X64: {
634
321k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
321k
                                 scratch_space);
636
321k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
135M
  }
669
135M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
670
671
// `scratch_space` should be at least 4 * kMaxBlocks * kMaxBlocks elements.
672
HWY_MAYBE_UNUSED void DCFromLowestFrequencies(const AcStrategyType strategy,
673
                                              const float* block, float* dc,
674
                                              size_t dc_stride,
675
21.1M
                                              float* scratch_space) {
676
21.1M
  using Type = AcStrategyType;
677
21.1M
  switch (strategy) {
678
407k
    case Type::DCT16X8: {
679
407k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
407k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
407k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
407k
      break;
683
0
    }
684
437k
    case Type::DCT8X16: {
685
437k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
437k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
437k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
437k
      break;
689
0
    }
690
281k
    case Type::DCT16X16: {
691
281k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
281k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
281k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
281k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
91.8k
    case Type::DCT32X16: {
709
91.8k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
91.8k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
91.8k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
91.8k
      break;
713
0
    }
714
89.5k
    case Type::DCT16X32: {
715
89.5k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
89.5k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
89.5k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
89.5k
      break;
719
0
    }
720
139k
    case Type::DCT32X32: {
721
139k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
139k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
139k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
139k
      break;
725
0
    }
726
27.8k
    case Type::DCT64X32: {
727
27.8k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
27.8k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
27.8k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
27.8k
      break;
731
0
    }
732
11.3k
    case Type::DCT32X64: {
733
11.3k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
11.3k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
11.3k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
11.3k
      break;
737
0
    }
738
80.3k
    case Type::DCT64X64: {
739
80.3k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
80.3k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
80.3k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
80.3k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
13.4M
    case Type::DCT:
787
17.2M
    case Type::DCT2X2:
788
17.2M
    case Type::DCT4X4:
789
17.3M
    case Type::DCT4X8:
790
17.6M
    case Type::DCT8X4:
791
17.8M
    case Type::AFV0:
792
17.9M
    case Type::AFV1:
793
18.0M
    case Type::AFV2:
794
18.2M
    case Type::AFV3:
795
19.5M
    case Type::IDENTITY:
796
19.5M
      dc[0] = block[0];
797
19.5M
      break;
798
21.1M
  }
799
21.1M
}
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
5.16M
                                              float* scratch_space) {
676
5.16M
  using Type = AcStrategyType;
677
5.16M
  switch (strategy) {
678
203k
    case Type::DCT16X8: {
679
203k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
203k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
203k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
203k
      break;
683
0
    }
684
218k
    case Type::DCT8X16: {
685
218k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
218k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
218k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
218k
      break;
689
0
    }
690
140k
    case Type::DCT16X16: {
691
140k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
140k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
140k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
140k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
45.9k
    case Type::DCT32X16: {
709
45.9k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
45.9k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
45.9k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
45.9k
      break;
713
0
    }
714
44.7k
    case Type::DCT16X32: {
715
44.7k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
44.7k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
44.7k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
44.7k
      break;
719
0
    }
720
69.7k
    case Type::DCT32X32: {
721
69.7k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
69.7k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
69.7k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
69.7k
      break;
725
0
    }
726
13.9k
    case Type::DCT64X32: {
727
13.9k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
13.9k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
13.9k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
13.9k
      break;
731
0
    }
732
5.66k
    case Type::DCT32X64: {
733
5.66k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
5.66k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
5.66k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
5.66k
      break;
737
0
    }
738
40.1k
    case Type::DCT64X64: {
739
40.1k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
40.1k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
40.1k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
40.1k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
1.31M
    case Type::DCT:
787
3.22M
    case Type::DCT2X2:
788
3.22M
    case Type::DCT4X4:
789
3.28M
    case Type::DCT4X8:
790
3.41M
    case Type::DCT8X4:
791
3.51M
    case Type::AFV0:
792
3.56M
    case Type::AFV1:
793
3.62M
    case Type::AFV2:
794
3.69M
    case Type::AFV3:
795
4.38M
    case Type::IDENTITY:
796
4.38M
      dc[0] = block[0];
797
4.38M
      break;
798
5.16M
  }
799
5.16M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
15.9M
                                              float* scratch_space) {
676
15.9M
  using Type = AcStrategyType;
677
15.9M
  switch (strategy) {
678
203k
    case Type::DCT16X8: {
679
203k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
203k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
203k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
203k
      break;
683
0
    }
684
218k
    case Type::DCT8X16: {
685
218k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
218k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
218k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
218k
      break;
689
0
    }
690
140k
    case Type::DCT16X16: {
691
140k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
140k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
140k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
140k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
45.9k
    case Type::DCT32X16: {
709
45.9k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
45.9k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
45.9k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
45.9k
      break;
713
0
    }
714
44.7k
    case Type::DCT16X32: {
715
44.7k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
44.7k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
44.7k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
44.7k
      break;
719
0
    }
720
69.7k
    case Type::DCT32X32: {
721
69.7k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
69.7k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
69.7k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
69.7k
      break;
725
0
    }
726
13.9k
    case Type::DCT64X32: {
727
13.9k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
13.9k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
13.9k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
13.9k
      break;
731
0
    }
732
5.66k
    case Type::DCT32X64: {
733
5.66k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
5.66k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
5.66k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
5.66k
      break;
737
0
    }
738
40.1k
    case Type::DCT64X64: {
739
40.1k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
40.1k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
40.1k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
40.1k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
12.1M
    case Type::DCT:
787
14.0M
    case Type::DCT2X2:
788
14.0M
    case Type::DCT4X4:
789
14.1M
    case Type::DCT4X8:
790
14.2M
    case Type::DCT8X4:
791
14.3M
    case Type::AFV0:
792
14.3M
    case Type::AFV1:
793
14.4M
    case Type::AFV2:
794
14.5M
    case Type::AFV3:
795
15.2M
    case Type::IDENTITY:
796
15.2M
      dc[0] = block[0];
797
15.2M
      break;
798
15.9M
  }
799
15.9M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
800
801
}  // namespace
802
// NOLINTNEXTLINE(google-readability-namespace-comments)
803
}  // namespace HWY_NAMESPACE
804
}  // namespace jxl
805
HWY_AFTER_NAMESPACE();
806
807
#endif  // LIB_JXL_ENC_TRANSFORMS_INL_H_