Coverage Report

Created: 2025-12-31 07:53

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/enc_transforms-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/base/compiler_specific.h"
7
#include "lib/jxl/frame_dimensions.h"
8
9
#if defined(LIB_JXL_ENC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
10
#ifdef LIB_JXL_ENC_TRANSFORMS_INL_H_
11
#undef LIB_JXL_ENC_TRANSFORMS_INL_H_
12
#else
13
#define LIB_JXL_ENC_TRANSFORMS_INL_H_
14
#endif
15
16
#include <cstddef>
17
#include <cstdint>
18
#include <hwy/highway.h>
19
20
#include "lib/jxl/ac_strategy.h"
21
#include "lib/jxl/dct-inl.h"
22
#include "lib/jxl/dct_scales.h"
23
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
27
enum class AcStrategyType : uint32_t;
28
29
namespace HWY_NAMESPACE {
30
namespace {
31
32
constexpr size_t kMaxBlocks = 32;
33
34
// Inverse of ReinterpretingDCT.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
HWY_INLINE void ReinterpretingIDCT(const float* input,
38
                                   const size_t input_stride, float* output,
39
2.92M
                                   const size_t output_stride, float* scratch) {
40
2.92M
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
2.92M
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
2.92M
  float* block = scratch;
43
2.92M
  if (ROWS < COLS) {
44
2.78M
    for (size_t y = 0; y < LF_ROWS; y++) {
45
6.38M
      for (size_t x = 0; x < LF_COLS; x++) {
46
4.81M
        block[y * COLS + x] = input[y * input_stride + x] *
47
4.81M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
4.81M
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
4.81M
      }
50
1.57M
    }
51
1.70M
  } else {
52
5.72M
    for (size_t y = 0; y < LF_COLS; y++) {
53
19.7M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
15.7M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
15.7M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
15.7M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
15.7M
      }
58
4.01M
    }
59
1.70M
  }
60
61
2.92M
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
2.92M
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
2.92M
                                  scratch_space);
64
2.92M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
328k
                                   const size_t output_stride, float* scratch) {
40
328k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
328k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
328k
  float* block = scratch;
43
328k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
328k
  } else {
52
656k
    for (size_t y = 0; y < LF_COLS; y++) {
53
984k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
656k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
656k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
656k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
656k
      }
58
328k
    }
59
328k
  }
60
61
328k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
328k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
328k
                                  scratch_space);
64
328k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
454k
                                   const size_t output_stride, float* scratch) {
40
454k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
454k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
454k
  float* block = scratch;
43
454k
  if (ROWS < COLS) {
44
909k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.36M
      for (size_t x = 0; x < LF_COLS; x++) {
46
909k
        block[y * COLS + x] = input[y * input_stride + x] *
47
909k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
909k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
909k
      }
50
454k
    }
51
454k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
454k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
454k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
454k
                                  scratch_space);
64
454k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
193k
                                   const size_t output_stride, float* scratch) {
40
193k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
193k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
193k
  float* block = scratch;
43
193k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
193k
  } else {
52
579k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.15M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
772k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
772k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
772k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
772k
      }
58
386k
    }
59
193k
  }
60
61
193k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
193k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
193k
                                  scratch_space);
64
193k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
89.7k
                                   const size_t output_stride, float* scratch) {
40
89.7k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
89.7k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
89.7k
  float* block = scratch;
43
89.7k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
89.7k
  } else {
52
269k
    for (size_t y = 0; y < LF_COLS; y++) {
53
897k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
717k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
717k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
717k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
717k
      }
58
179k
    }
59
89.7k
  }
60
61
89.7k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
89.7k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
89.7k
                                  scratch_space);
64
89.7k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
143k
                                   const size_t output_stride, float* scratch) {
40
143k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
143k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
143k
  float* block = scratch;
43
143k
  if (ROWS < COLS) {
44
431k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.43M
      for (size_t x = 0; x < LF_COLS; x++) {
46
1.14M
        block[y * COLS + x] = input[y * input_stride + x] *
47
1.14M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
1.14M
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
1.14M
      }
50
287k
    }
51
143k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
143k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
143k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
143k
                                  scratch_space);
64
143k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
198k
                                   const size_t output_stride, float* scratch) {
40
198k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
198k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
198k
  float* block = scratch;
43
198k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
198k
  } else {
52
993k
    for (size_t y = 0; y < LF_COLS; y++) {
53
3.97M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
3.18M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
3.18M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
3.18M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
3.18M
      }
58
795k
    }
59
198k
  }
60
61
198k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
198k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
198k
                                  scratch_space);
64
198k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
6.19k
                                   const size_t output_stride, float* scratch) {
40
6.19k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
6.19k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
6.19k
  float* block = scratch;
43
6.19k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
6.19k
  } else {
52
30.9k
    for (size_t y = 0; y < LF_COLS; y++) {
53
223k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
198k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
198k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
198k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
198k
      }
58
24.7k
    }
59
6.19k
  }
60
61
6.19k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
6.19k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
6.19k
                                  scratch_space);
64
6.19k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
10.9k
                                   const size_t output_stride, float* scratch) {
40
10.9k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
10.9k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
10.9k
  float* block = scratch;
43
10.9k
  if (ROWS < COLS) {
44
54.6k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
393k
      for (size_t x = 0; x < LF_COLS; x++) {
46
349k
        block[y * COLS + x] = input[y * input_stride + x] *
47
349k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
349k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
349k
      }
50
43.7k
    }
51
10.9k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
10.9k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
10.9k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
10.9k
                                  scratch_space);
64
10.9k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
36.8k
                                   const size_t output_stride, float* scratch) {
40
36.8k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
36.8k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
36.8k
  float* block = scratch;
43
36.8k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
36.8k
  } else {
52
331k
    for (size_t y = 0; y < LF_COLS; y++) {
53
2.65M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
2.35M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
2.35M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
2.35M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
2.35M
      }
58
294k
    }
59
36.8k
  }
60
61
36.8k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
36.8k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
36.8k
                                  scratch_space);
64
36.8k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
328k
                                   const size_t output_stride, float* scratch) {
40
328k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
328k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
328k
  float* block = scratch;
43
328k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
328k
  } else {
52
656k
    for (size_t y = 0; y < LF_COLS; y++) {
53
984k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
656k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
656k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
656k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
656k
      }
58
328k
    }
59
328k
  }
60
61
328k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
328k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
328k
                                  scratch_space);
64
328k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
454k
                                   const size_t output_stride, float* scratch) {
40
454k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
454k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
454k
  float* block = scratch;
43
454k
  if (ROWS < COLS) {
44
909k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.36M
      for (size_t x = 0; x < LF_COLS; x++) {
46
909k
        block[y * COLS + x] = input[y * input_stride + x] *
47
909k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
909k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
909k
      }
50
454k
    }
51
454k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
454k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
454k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
454k
                                  scratch_space);
64
454k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
193k
                                   const size_t output_stride, float* scratch) {
40
193k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
193k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
193k
  float* block = scratch;
43
193k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
193k
  } else {
52
579k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.15M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
772k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
772k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
772k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
772k
      }
58
386k
    }
59
193k
  }
60
61
193k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
193k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
193k
                                  scratch_space);
64
193k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
89.7k
                                   const size_t output_stride, float* scratch) {
40
89.7k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
89.7k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
89.7k
  float* block = scratch;
43
89.7k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
89.7k
  } else {
52
269k
    for (size_t y = 0; y < LF_COLS; y++) {
53
897k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
717k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
717k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
717k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
717k
      }
58
179k
    }
59
89.7k
  }
60
61
89.7k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
89.7k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
89.7k
                                  scratch_space);
64
89.7k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
143k
                                   const size_t output_stride, float* scratch) {
40
143k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
143k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
143k
  float* block = scratch;
43
143k
  if (ROWS < COLS) {
44
431k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.43M
      for (size_t x = 0; x < LF_COLS; x++) {
46
1.14M
        block[y * COLS + x] = input[y * input_stride + x] *
47
1.14M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
1.14M
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
1.14M
      }
50
287k
    }
51
143k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
143k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
143k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
143k
                                  scratch_space);
64
143k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
198k
                                   const size_t output_stride, float* scratch) {
40
198k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
198k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
198k
  float* block = scratch;
43
198k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
198k
  } else {
52
993k
    for (size_t y = 0; y < LF_COLS; y++) {
53
3.97M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
3.18M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
3.18M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
3.18M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
3.18M
      }
58
795k
    }
59
198k
  }
60
61
198k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
198k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
198k
                                  scratch_space);
64
198k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
6.19k
                                   const size_t output_stride, float* scratch) {
40
6.19k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
6.19k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
6.19k
  float* block = scratch;
43
6.19k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
6.19k
  } else {
52
30.9k
    for (size_t y = 0; y < LF_COLS; y++) {
53
223k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
198k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
198k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
198k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
198k
      }
58
24.7k
    }
59
6.19k
  }
60
61
6.19k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
6.19k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
6.19k
                                  scratch_space);
64
6.19k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
10.9k
                                   const size_t output_stride, float* scratch) {
40
10.9k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
10.9k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
10.9k
  float* block = scratch;
43
10.9k
  if (ROWS < COLS) {
44
54.6k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
393k
      for (size_t x = 0; x < LF_COLS; x++) {
46
349k
        block[y * COLS + x] = input[y * input_stride + x] *
47
349k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
349k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
349k
      }
50
43.7k
    }
51
10.9k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
10.9k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
10.9k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
10.9k
                                  scratch_space);
64
10.9k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
36.8k
                                   const size_t output_stride, float* scratch) {
40
36.8k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
36.8k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
36.8k
  float* block = scratch;
43
36.8k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
36.8k
  } else {
52
331k
    for (size_t y = 0; y < LF_COLS; y++) {
53
2.65M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
2.35M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
2.35M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
2.35M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
2.35M
      }
58
294k
    }
59
36.8k
  }
60
61
36.8k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
36.8k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
36.8k
                                  scratch_space);
64
36.8k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
65
66
template <size_t S>
67
60.6M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
60.6M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
60.6M
  static_assert(S % 2 == 0, "S should be even");
70
60.6M
  float temp[kDCTBlockSize];
71
60.6M
  constexpr size_t num_2x2 = S / 2;
72
202M
  for (size_t y = 0; y < num_2x2; y++) {
73
566M
    for (size_t x = 0; x < num_2x2; x++) {
74
424M
      float c00 = block[y * 2 * stride + x * 2];
75
424M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
424M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
424M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
424M
      float r00 = c00 + c01 + c10 + c11;
79
424M
      float r01 = c00 + c01 - c10 - c11;
80
424M
      float r10 = c00 - c01 + c10 - c11;
81
424M
      float r11 = c00 - c01 - c10 + c11;
82
424M
      r00 *= 0.25f;
83
424M
      r01 *= 0.25f;
84
424M
      r10 *= 0.25f;
85
424M
      r11 *= 0.25f;
86
424M
      temp[y * kBlockDim + x] = r00;
87
424M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
424M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
424M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
424M
    }
91
141M
  }
92
343M
  for (size_t y = 0; y < S; y++) {
93
1.98G
    for (size_t x = 0; x < S; x++) {
94
1.69G
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
1.69G
    }
96
283M
  }
97
60.6M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.13M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.13M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.13M
  static_assert(S % 2 == 0, "S should be even");
70
1.13M
  float temp[kDCTBlockSize];
71
1.13M
  constexpr size_t num_2x2 = S / 2;
72
5.65M
  for (size_t y = 0; y < num_2x2; y++) {
73
22.6M
    for (size_t x = 0; x < num_2x2; x++) {
74
18.0M
      float c00 = block[y * 2 * stride + x * 2];
75
18.0M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
18.0M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
18.0M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
18.0M
      float r00 = c00 + c01 + c10 + c11;
79
18.0M
      float r01 = c00 + c01 - c10 - c11;
80
18.0M
      float r10 = c00 - c01 + c10 - c11;
81
18.0M
      float r11 = c00 - c01 - c10 + c11;
82
18.0M
      r00 *= 0.25f;
83
18.0M
      r01 *= 0.25f;
84
18.0M
      r10 *= 0.25f;
85
18.0M
      r11 *= 0.25f;
86
18.0M
      temp[y * kBlockDim + x] = r00;
87
18.0M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
18.0M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
18.0M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
18.0M
    }
91
4.52M
  }
92
10.1M
  for (size_t y = 0; y < S; y++) {
93
81.3M
    for (size_t x = 0; x < S; x++) {
94
72.3M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
72.3M
    }
96
9.04M
  }
97
1.13M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.13M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.13M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.13M
  static_assert(S % 2 == 0, "S should be even");
70
1.13M
  float temp[kDCTBlockSize];
71
1.13M
  constexpr size_t num_2x2 = S / 2;
72
3.39M
  for (size_t y = 0; y < num_2x2; y++) {
73
6.78M
    for (size_t x = 0; x < num_2x2; x++) {
74
4.52M
      float c00 = block[y * 2 * stride + x * 2];
75
4.52M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
4.52M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
4.52M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
4.52M
      float r00 = c00 + c01 + c10 + c11;
79
4.52M
      float r01 = c00 + c01 - c10 - c11;
80
4.52M
      float r10 = c00 - c01 + c10 - c11;
81
4.52M
      float r11 = c00 - c01 - c10 + c11;
82
4.52M
      r00 *= 0.25f;
83
4.52M
      r01 *= 0.25f;
84
4.52M
      r10 *= 0.25f;
85
4.52M
      r11 *= 0.25f;
86
4.52M
      temp[y * kBlockDim + x] = r00;
87
4.52M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
4.52M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
4.52M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
4.52M
    }
91
2.26M
  }
92
5.65M
  for (size_t y = 0; y < S; y++) {
93
22.6M
    for (size_t x = 0; x < S; x++) {
94
18.0M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
18.0M
    }
96
4.52M
  }
97
1.13M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.13M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.13M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.13M
  static_assert(S % 2 == 0, "S should be even");
70
1.13M
  float temp[kDCTBlockSize];
71
1.13M
  constexpr size_t num_2x2 = S / 2;
72
2.26M
  for (size_t y = 0; y < num_2x2; y++) {
73
2.26M
    for (size_t x = 0; x < num_2x2; x++) {
74
1.13M
      float c00 = block[y * 2 * stride + x * 2];
75
1.13M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
1.13M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
1.13M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
1.13M
      float r00 = c00 + c01 + c10 + c11;
79
1.13M
      float r01 = c00 + c01 - c10 - c11;
80
1.13M
      float r10 = c00 - c01 + c10 - c11;
81
1.13M
      float r11 = c00 - c01 - c10 + c11;
82
1.13M
      r00 *= 0.25f;
83
1.13M
      r01 *= 0.25f;
84
1.13M
      r10 *= 0.25f;
85
1.13M
      r11 *= 0.25f;
86
1.13M
      temp[y * kBlockDim + x] = r00;
87
1.13M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
1.13M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
1.13M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
1.13M
    }
91
1.13M
  }
92
3.39M
  for (size_t y = 0; y < S; y++) {
93
6.78M
    for (size_t x = 0; x < S; x++) {
94
4.52M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
4.52M
    }
96
2.26M
  }
97
1.13M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
17.9M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
17.9M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
17.9M
  static_assert(S % 2 == 0, "S should be even");
70
17.9M
  float temp[kDCTBlockSize];
71
17.9M
  constexpr size_t num_2x2 = S / 2;
72
89.7M
  for (size_t y = 0; y < num_2x2; y++) {
73
359M
    for (size_t x = 0; x < num_2x2; x++) {
74
287M
      float c00 = block[y * 2 * stride + x * 2];
75
287M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
287M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
287M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
287M
      float r00 = c00 + c01 + c10 + c11;
79
287M
      float r01 = c00 + c01 - c10 - c11;
80
287M
      float r10 = c00 - c01 + c10 - c11;
81
287M
      float r11 = c00 - c01 - c10 + c11;
82
287M
      r00 *= 0.25f;
83
287M
      r01 *= 0.25f;
84
287M
      r10 *= 0.25f;
85
287M
      r11 *= 0.25f;
86
287M
      temp[y * kBlockDim + x] = r00;
87
287M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
287M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
287M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
287M
    }
91
71.8M
  }
92
161M
  for (size_t y = 0; y < S; y++) {
93
1.29G
    for (size_t x = 0; x < S; x++) {
94
1.14G
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
1.14G
    }
96
143M
  }
97
17.9M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
17.9M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
17.9M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
17.9M
  static_assert(S % 2 == 0, "S should be even");
70
17.9M
  float temp[kDCTBlockSize];
71
17.9M
  constexpr size_t num_2x2 = S / 2;
72
53.8M
  for (size_t y = 0; y < num_2x2; y++) {
73
107M
    for (size_t x = 0; x < num_2x2; x++) {
74
71.8M
      float c00 = block[y * 2 * stride + x * 2];
75
71.8M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
71.8M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
71.8M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
71.8M
      float r00 = c00 + c01 + c10 + c11;
79
71.8M
      float r01 = c00 + c01 - c10 - c11;
80
71.8M
      float r10 = c00 - c01 + c10 - c11;
81
71.8M
      float r11 = c00 - c01 - c10 + c11;
82
71.8M
      r00 *= 0.25f;
83
71.8M
      r01 *= 0.25f;
84
71.8M
      r10 *= 0.25f;
85
71.8M
      r11 *= 0.25f;
86
71.8M
      temp[y * kBlockDim + x] = r00;
87
71.8M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
71.8M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
71.8M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
71.8M
    }
91
35.9M
  }
92
89.7M
  for (size_t y = 0; y < S; y++) {
93
359M
    for (size_t x = 0; x < S; x++) {
94
287M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
287M
    }
96
71.8M
  }
97
17.9M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
17.9M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
17.9M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
17.9M
  static_assert(S % 2 == 0, "S should be even");
70
17.9M
  float temp[kDCTBlockSize];
71
17.9M
  constexpr size_t num_2x2 = S / 2;
72
35.9M
  for (size_t y = 0; y < num_2x2; y++) {
73
35.9M
    for (size_t x = 0; x < num_2x2; x++) {
74
17.9M
      float c00 = block[y * 2 * stride + x * 2];
75
17.9M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
17.9M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
17.9M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
17.9M
      float r00 = c00 + c01 + c10 + c11;
79
17.9M
      float r01 = c00 + c01 - c10 - c11;
80
17.9M
      float r10 = c00 - c01 + c10 - c11;
81
17.9M
      float r11 = c00 - c01 - c10 + c11;
82
17.9M
      r00 *= 0.25f;
83
17.9M
      r01 *= 0.25f;
84
17.9M
      r10 *= 0.25f;
85
17.9M
      r11 *= 0.25f;
86
17.9M
      temp[y * kBlockDim + x] = r00;
87
17.9M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
17.9M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
17.9M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
17.9M
    }
91
17.9M
  }
92
53.8M
  for (size_t y = 0; y < S; y++) {
93
107M
    for (size_t x = 0; x < S; x++) {
94
71.8M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
71.8M
    }
96
35.9M
  }
97
17.9M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.13M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.13M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.13M
  static_assert(S % 2 == 0, "S should be even");
70
1.13M
  float temp[kDCTBlockSize];
71
1.13M
  constexpr size_t num_2x2 = S / 2;
72
5.65M
  for (size_t y = 0; y < num_2x2; y++) {
73
22.6M
    for (size_t x = 0; x < num_2x2; x++) {
74
18.0M
      float c00 = block[y * 2 * stride + x * 2];
75
18.0M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
18.0M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
18.0M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
18.0M
      float r00 = c00 + c01 + c10 + c11;
79
18.0M
      float r01 = c00 + c01 - c10 - c11;
80
18.0M
      float r10 = c00 - c01 + c10 - c11;
81
18.0M
      float r11 = c00 - c01 - c10 + c11;
82
18.0M
      r00 *= 0.25f;
83
18.0M
      r01 *= 0.25f;
84
18.0M
      r10 *= 0.25f;
85
18.0M
      r11 *= 0.25f;
86
18.0M
      temp[y * kBlockDim + x] = r00;
87
18.0M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
18.0M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
18.0M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
18.0M
    }
91
4.52M
  }
92
10.1M
  for (size_t y = 0; y < S; y++) {
93
81.3M
    for (size_t x = 0; x < S; x++) {
94
72.3M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
72.3M
    }
96
9.04M
  }
97
1.13M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.13M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.13M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.13M
  static_assert(S % 2 == 0, "S should be even");
70
1.13M
  float temp[kDCTBlockSize];
71
1.13M
  constexpr size_t num_2x2 = S / 2;
72
3.39M
  for (size_t y = 0; y < num_2x2; y++) {
73
6.78M
    for (size_t x = 0; x < num_2x2; x++) {
74
4.52M
      float c00 = block[y * 2 * stride + x * 2];
75
4.52M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
4.52M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
4.52M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
4.52M
      float r00 = c00 + c01 + c10 + c11;
79
4.52M
      float r01 = c00 + c01 - c10 - c11;
80
4.52M
      float r10 = c00 - c01 + c10 - c11;
81
4.52M
      float r11 = c00 - c01 - c10 + c11;
82
4.52M
      r00 *= 0.25f;
83
4.52M
      r01 *= 0.25f;
84
4.52M
      r10 *= 0.25f;
85
4.52M
      r11 *= 0.25f;
86
4.52M
      temp[y * kBlockDim + x] = r00;
87
4.52M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
4.52M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
4.52M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
4.52M
    }
91
2.26M
  }
92
5.65M
  for (size_t y = 0; y < S; y++) {
93
22.6M
    for (size_t x = 0; x < S; x++) {
94
18.0M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
18.0M
    }
96
4.52M
  }
97
1.13M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.13M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.13M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.13M
  static_assert(S % 2 == 0, "S should be even");
70
1.13M
  float temp[kDCTBlockSize];
71
1.13M
  constexpr size_t num_2x2 = S / 2;
72
2.26M
  for (size_t y = 0; y < num_2x2; y++) {
73
2.26M
    for (size_t x = 0; x < num_2x2; x++) {
74
1.13M
      float c00 = block[y * 2 * stride + x * 2];
75
1.13M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
1.13M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
1.13M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
1.13M
      float r00 = c00 + c01 + c10 + c11;
79
1.13M
      float r01 = c00 + c01 - c10 - c11;
80
1.13M
      float r10 = c00 - c01 + c10 - c11;
81
1.13M
      float r11 = c00 - c01 - c10 + c11;
82
1.13M
      r00 *= 0.25f;
83
1.13M
      r01 *= 0.25f;
84
1.13M
      r10 *= 0.25f;
85
1.13M
      r11 *= 0.25f;
86
1.13M
      temp[y * kBlockDim + x] = r00;
87
1.13M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
1.13M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
1.13M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
1.13M
    }
91
1.13M
  }
92
3.39M
  for (size_t y = 0; y < S; y++) {
93
6.78M
    for (size_t x = 0; x < S; x++) {
94
4.52M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
4.52M
    }
96
2.26M
  }
97
1.13M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
98
99
74.6M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
74.6M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
74.6M
      {
102
74.6M
          0.2500000000000000,
103
74.6M
          0.8769029297991420f,
104
74.6M
          0.0000000000000000,
105
74.6M
          0.0000000000000000,
106
74.6M
          0.0000000000000000,
107
74.6M
          -0.4105377591765233f,
108
74.6M
          0.0000000000000000,
109
74.6M
          0.0000000000000000,
110
74.6M
          0.0000000000000000,
111
74.6M
          0.0000000000000000,
112
74.6M
          0.0000000000000000,
113
74.6M
          0.0000000000000000,
114
74.6M
          0.0000000000000000,
115
74.6M
          0.0000000000000000,
116
74.6M
          0.0000000000000000,
117
74.6M
          0.0000000000000000,
118
74.6M
      },
119
74.6M
      {
120
74.6M
          0.2500000000000000,
121
74.6M
          0.2206518106944235f,
122
74.6M
          0.0000000000000000,
123
74.6M
          0.0000000000000000,
124
74.6M
          -0.7071067811865474f,
125
74.6M
          0.6235485373547691f,
126
74.6M
          0.0000000000000000,
127
74.6M
          0.0000000000000000,
128
74.6M
          0.0000000000000000,
129
74.6M
          0.0000000000000000,
130
74.6M
          0.0000000000000000,
131
74.6M
          0.0000000000000000,
132
74.6M
          0.0000000000000000,
133
74.6M
          0.0000000000000000,
134
74.6M
          0.0000000000000000,
135
74.6M
          0.0000000000000000,
136
74.6M
      },
137
74.6M
      {
138
74.6M
          0.2500000000000000,
139
74.6M
          -0.1014005039375376f,
140
74.6M
          0.4067007583026075f,
141
74.6M
          -0.2125574805828875f,
142
74.6M
          0.0000000000000000,
143
74.6M
          -0.0643507165794627f,
144
74.6M
          -0.4517556589999482f,
145
74.6M
          -0.3046847507248690f,
146
74.6M
          0.3017929516615495f,
147
74.6M
          0.4082482904638627f,
148
74.6M
          0.1747866975480809f,
149
74.6M
          -0.2110560104933578f,
150
74.6M
          -0.1426608480880726f,
151
74.6M
          -0.1381354035075859f,
152
74.6M
          -0.1743760259965107f,
153
74.6M
          0.1135498731499434f,
154
74.6M
      },
155
74.6M
      {
156
74.6M
          0.2500000000000000,
157
74.6M
          -0.1014005039375375f,
158
74.6M
          0.4444481661973445f,
159
74.6M
          0.3085497062849767f,
160
74.6M
          0.0000000000000000f,
161
74.6M
          -0.0643507165794627f,
162
74.6M
          0.1585450355184006f,
163
74.6M
          0.5112616136591823f,
164
74.6M
          0.2579236279634118f,
165
74.6M
          0.0000000000000000,
166
74.6M
          0.0812611176717539f,
167
74.6M
          0.1856718091610980f,
168
74.6M
          -0.3416446842253372f,
169
74.6M
          0.3302282550303788f,
170
74.6M
          0.0702790691196284f,
171
74.6M
          -0.0741750459581035f,
172
74.6M
      },
173
74.6M
      {
174
74.6M
          0.2500000000000000,
175
74.6M
          0.2206518106944236f,
176
74.6M
          0.0000000000000000,
177
74.6M
          0.0000000000000000,
178
74.6M
          0.7071067811865476f,
179
74.6M
          0.6235485373547694f,
180
74.6M
          0.0000000000000000,
181
74.6M
          0.0000000000000000,
182
74.6M
          0.0000000000000000,
183
74.6M
          0.0000000000000000,
184
74.6M
          0.0000000000000000,
185
74.6M
          0.0000000000000000,
186
74.6M
          0.0000000000000000,
187
74.6M
          0.0000000000000000,
188
74.6M
          0.0000000000000000,
189
74.6M
          0.0000000000000000,
190
74.6M
      },
191
74.6M
      {
192
74.6M
          0.2500000000000000,
193
74.6M
          -0.1014005039375378f,
194
74.6M
          0.0000000000000000,
195
74.6M
          0.4706702258572536f,
196
74.6M
          0.0000000000000000,
197
74.6M
          -0.0643507165794628f,
198
74.6M
          -0.0403851516082220f,
199
74.6M
          0.0000000000000000,
200
74.6M
          0.1627234014286620f,
201
74.6M
          0.0000000000000000,
202
74.6M
          0.0000000000000000,
203
74.6M
          0.0000000000000000,
204
74.6M
          0.7367497537172237f,
205
74.6M
          0.0875511500058708f,
206
74.6M
          -0.2921026642334881f,
207
74.6M
          0.1940289303259434f,
208
74.6M
      },
209
74.6M
      {
210
74.6M
          0.2500000000000000,
211
74.6M
          -0.1014005039375377f,
212
74.6M
          0.1957439937204294f,
213
74.6M
          -0.1621205195722993f,
214
74.6M
          0.0000000000000000,
215
74.6M
          -0.0643507165794628f,
216
74.6M
          0.0074182263792424f,
217
74.6M
          -0.2904801297289980f,
218
74.6M
          0.0952002265347504f,
219
74.6M
          0.0000000000000000,
220
74.6M
          -0.3675398009862027f,
221
74.6M
          0.4921585901373873f,
222
74.6M
          0.2462710772207515f,
223
74.6M
          -0.0794670660590957f,
224
74.6M
          0.3623817333531167f,
225
74.6M
          -0.4351904965232280f,
226
74.6M
      },
227
74.6M
      {
228
74.6M
          0.2500000000000000,
229
74.6M
          -0.1014005039375376f,
230
74.6M
          0.2929100136981264f,
231
74.6M
          0.0000000000000000,
232
74.6M
          0.0000000000000000,
233
74.6M
          -0.0643507165794627f,
234
74.6M
          0.3935103426921017f,
235
74.6M
          -0.0657870154914280f,
236
74.6M
          0.0000000000000000,
237
74.6M
          -0.4082482904638628f,
238
74.6M
          -0.3078822139579090f,
239
74.6M
          -0.3852501370925192f,
240
74.6M
          -0.0857401903551931f,
241
74.6M
          -0.4613374887461511f,
242
74.6M
          0.0000000000000000,
243
74.6M
          0.2191868483885747f,
244
74.6M
      },
245
74.6M
      {
246
74.6M
          0.2500000000000000,
247
74.6M
          -0.1014005039375376f,
248
74.6M
          -0.4067007583026072f,
249
74.6M
          -0.2125574805828705f,
250
74.6M
          0.0000000000000000,
251
74.6M
          -0.0643507165794627f,
252
74.6M
          -0.4517556589999464f,
253
74.6M
          0.3046847507248840f,
254
74.6M
          0.3017929516615503f,
255
74.6M
          -0.4082482904638635f,
256
74.6M
          -0.1747866975480813f,
257
74.6M
          0.2110560104933581f,
258
74.6M
          -0.1426608480880734f,
259
74.6M
          -0.1381354035075829f,
260
74.6M
          -0.1743760259965108f,
261
74.6M
          0.1135498731499426f,
262
74.6M
      },
263
74.6M
      {
264
74.6M
          0.2500000000000000,
265
74.6M
          -0.1014005039375377f,
266
74.6M
          -0.1957439937204287f,
267
74.6M
          -0.1621205195722833f,
268
74.6M
          0.0000000000000000,
269
74.6M
          -0.0643507165794628f,
270
74.6M
          0.0074182263792444f,
271
74.6M
          0.2904801297290076f,
272
74.6M
          0.0952002265347505f,
273
74.6M
          0.0000000000000000,
274
74.6M
          0.3675398009862011f,
275
74.6M
          -0.4921585901373891f,
276
74.6M
          0.2462710772207514f,
277
74.6M
          -0.0794670660591026f,
278
74.6M
          0.3623817333531165f,
279
74.6M
          -0.4351904965232251f,
280
74.6M
      },
281
74.6M
      {
282
74.6M
          0.2500000000000000,
283
74.6M
          -0.1014005039375375f,
284
74.6M
          0.0000000000000000,
285
74.6M
          -0.4706702258572528f,
286
74.6M
          0.0000000000000000,
287
74.6M
          -0.0643507165794627f,
288
74.6M
          0.1107416575309343f,
289
74.6M
          0.0000000000000000,
290
74.6M
          -0.1627234014286617f,
291
74.6M
          0.0000000000000000,
292
74.6M
          0.0000000000000000,
293
74.6M
          0.0000000000000000,
294
74.6M
          0.1488339922711357f,
295
74.6M
          0.4972464710953509f,
296
74.6M
          0.2921026642334879f,
297
74.6M
          0.5550443808910661f,
298
74.6M
      },
299
74.6M
      {
300
74.6M
          0.2500000000000000,
301
74.6M
          -0.1014005039375377f,
302
74.6M
          0.1137907446044809f,
303
74.6M
          -0.1464291867126764f,
304
74.6M
          0.0000000000000000,
305
74.6M
          -0.0643507165794628f,
306
74.6M
          0.0829816309488205f,
307
74.6M
          -0.2388977352334460f,
308
74.6M
          -0.3531238544981630f,
309
74.6M
          -0.4082482904638630f,
310
74.6M
          0.4826689115059883f,
311
74.6M
          0.1741941265991622f,
312
74.6M
          -0.0476868035022925f,
313
74.6M
          0.1253805944856366f,
314
74.6M
          -0.4326608024727445f,
315
74.6M
          -0.2546827712406646f,
316
74.6M
      },
317
74.6M
      {
318
74.6M
          0.2500000000000000,
319
74.6M
          -0.1014005039375377f,
320
74.6M
          -0.4444481661973438f,
321
74.6M
          0.3085497062849487f,
322
74.6M
          0.0000000000000000,
323
74.6M
          -0.0643507165794628f,
324
74.6M
          0.1585450355183970f,
325
74.6M
          -0.5112616136592012f,
326
74.6M
          0.2579236279634129f,
327
74.6M
          0.0000000000000000,
328
74.6M
          -0.0812611176717504f,
329
74.6M
          -0.1856718091610990f,
330
74.6M
          -0.3416446842253373f,
331
74.6M
          0.3302282550303805f,
332
74.6M
          0.0702790691196282f,
333
74.6M
          -0.0741750459581023f,
334
74.6M
      },
335
74.6M
      {
336
74.6M
          0.2500000000000000,
337
74.6M
          -0.1014005039375376f,
338
74.6M
          -0.2929100136981264f,
339
74.6M
          0.0000000000000000,
340
74.6M
          0.0000000000000000,
341
74.6M
          -0.0643507165794627f,
342
74.6M
          0.3935103426921022f,
343
74.6M
          0.0657870154914254f,
344
74.6M
          0.0000000000000000,
345
74.6M
          0.4082482904638634f,
346
74.6M
          0.3078822139579031f,
347
74.6M
          0.3852501370925211f,
348
74.6M
          -0.0857401903551927f,
349
74.6M
          -0.4613374887461554f,
350
74.6M
          0.0000000000000000,
351
74.6M
          0.2191868483885728f,
352
74.6M
      },
353
74.6M
      {
354
74.6M
          0.2500000000000000,
355
74.6M
          -0.1014005039375376f,
356
74.6M
          -0.1137907446044814f,
357
74.6M
          -0.1464291867126654f,
358
74.6M
          0.0000000000000000,
359
74.6M
          -0.0643507165794627f,
360
74.6M
          0.0829816309488214f,
361
74.6M
          0.2388977352334547f,
362
74.6M
          -0.3531238544981624f,
363
74.6M
          0.4082482904638630f,
364
74.6M
          -0.4826689115059858f,
365
74.6M
          -0.1741941265991621f,
366
74.6M
          -0.0476868035022928f,
367
74.6M
          0.1253805944856431f,
368
74.6M
          -0.4326608024727457f,
369
74.6M
          -0.2546827712406641f,
370
74.6M
      },
371
74.6M
      {
372
74.6M
          0.2500000000000000,
373
74.6M
          -0.1014005039375374f,
374
74.6M
          0.0000000000000000,
375
74.6M
          0.4251149611657548f,
376
74.6M
          0.0000000000000000,
377
74.6M
          -0.0643507165794626f,
378
74.6M
          -0.4517556589999480f,
379
74.6M
          0.0000000000000000,
380
74.6M
          -0.6035859033230976f,
381
74.6M
          0.0000000000000000,
382
74.6M
          0.0000000000000000,
383
74.6M
          0.0000000000000000,
384
74.6M
          -0.1426608480880724f,
385
74.6M
          -0.1381354035075845f,
386
74.6M
          0.3487520519930227f,
387
74.6M
          0.1135498731499429f,
388
74.6M
      },
389
74.6M
  };
390
391
74.6M
  const HWY_CAPPED(float, 16) d;
392
224M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
149M
    auto scalar = Zero(d);
394
2.53G
    for (size_t j = 0; j < 16; j++) {
395
2.38G
      auto px = Set(d, pixels[j]);
396
2.38G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
2.38G
      scalar = MulAdd(px, basis, scalar);
398
2.38G
    }
399
149M
    Store(scalar, d, coeffs + i);
400
149M
  }
401
74.6M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
1.42M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
1.42M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
1.42M
      {
102
1.42M
          0.2500000000000000,
103
1.42M
          0.8769029297991420f,
104
1.42M
          0.0000000000000000,
105
1.42M
          0.0000000000000000,
106
1.42M
          0.0000000000000000,
107
1.42M
          -0.4105377591765233f,
108
1.42M
          0.0000000000000000,
109
1.42M
          0.0000000000000000,
110
1.42M
          0.0000000000000000,
111
1.42M
          0.0000000000000000,
112
1.42M
          0.0000000000000000,
113
1.42M
          0.0000000000000000,
114
1.42M
          0.0000000000000000,
115
1.42M
          0.0000000000000000,
116
1.42M
          0.0000000000000000,
117
1.42M
          0.0000000000000000,
118
1.42M
      },
119
1.42M
      {
120
1.42M
          0.2500000000000000,
121
1.42M
          0.2206518106944235f,
122
1.42M
          0.0000000000000000,
123
1.42M
          0.0000000000000000,
124
1.42M
          -0.7071067811865474f,
125
1.42M
          0.6235485373547691f,
126
1.42M
          0.0000000000000000,
127
1.42M
          0.0000000000000000,
128
1.42M
          0.0000000000000000,
129
1.42M
          0.0000000000000000,
130
1.42M
          0.0000000000000000,
131
1.42M
          0.0000000000000000,
132
1.42M
          0.0000000000000000,
133
1.42M
          0.0000000000000000,
134
1.42M
          0.0000000000000000,
135
1.42M
          0.0000000000000000,
136
1.42M
      },
137
1.42M
      {
138
1.42M
          0.2500000000000000,
139
1.42M
          -0.1014005039375376f,
140
1.42M
          0.4067007583026075f,
141
1.42M
          -0.2125574805828875f,
142
1.42M
          0.0000000000000000,
143
1.42M
          -0.0643507165794627f,
144
1.42M
          -0.4517556589999482f,
145
1.42M
          -0.3046847507248690f,
146
1.42M
          0.3017929516615495f,
147
1.42M
          0.4082482904638627f,
148
1.42M
          0.1747866975480809f,
149
1.42M
          -0.2110560104933578f,
150
1.42M
          -0.1426608480880726f,
151
1.42M
          -0.1381354035075859f,
152
1.42M
          -0.1743760259965107f,
153
1.42M
          0.1135498731499434f,
154
1.42M
      },
155
1.42M
      {
156
1.42M
          0.2500000000000000,
157
1.42M
          -0.1014005039375375f,
158
1.42M
          0.4444481661973445f,
159
1.42M
          0.3085497062849767f,
160
1.42M
          0.0000000000000000f,
161
1.42M
          -0.0643507165794627f,
162
1.42M
          0.1585450355184006f,
163
1.42M
          0.5112616136591823f,
164
1.42M
          0.2579236279634118f,
165
1.42M
          0.0000000000000000,
166
1.42M
          0.0812611176717539f,
167
1.42M
          0.1856718091610980f,
168
1.42M
          -0.3416446842253372f,
169
1.42M
          0.3302282550303788f,
170
1.42M
          0.0702790691196284f,
171
1.42M
          -0.0741750459581035f,
172
1.42M
      },
173
1.42M
      {
174
1.42M
          0.2500000000000000,
175
1.42M
          0.2206518106944236f,
176
1.42M
          0.0000000000000000,
177
1.42M
          0.0000000000000000,
178
1.42M
          0.7071067811865476f,
179
1.42M
          0.6235485373547694f,
180
1.42M
          0.0000000000000000,
181
1.42M
          0.0000000000000000,
182
1.42M
          0.0000000000000000,
183
1.42M
          0.0000000000000000,
184
1.42M
          0.0000000000000000,
185
1.42M
          0.0000000000000000,
186
1.42M
          0.0000000000000000,
187
1.42M
          0.0000000000000000,
188
1.42M
          0.0000000000000000,
189
1.42M
          0.0000000000000000,
190
1.42M
      },
191
1.42M
      {
192
1.42M
          0.2500000000000000,
193
1.42M
          -0.1014005039375378f,
194
1.42M
          0.0000000000000000,
195
1.42M
          0.4706702258572536f,
196
1.42M
          0.0000000000000000,
197
1.42M
          -0.0643507165794628f,
198
1.42M
          -0.0403851516082220f,
199
1.42M
          0.0000000000000000,
200
1.42M
          0.1627234014286620f,
201
1.42M
          0.0000000000000000,
202
1.42M
          0.0000000000000000,
203
1.42M
          0.0000000000000000,
204
1.42M
          0.7367497537172237f,
205
1.42M
          0.0875511500058708f,
206
1.42M
          -0.2921026642334881f,
207
1.42M
          0.1940289303259434f,
208
1.42M
      },
209
1.42M
      {
210
1.42M
          0.2500000000000000,
211
1.42M
          -0.1014005039375377f,
212
1.42M
          0.1957439937204294f,
213
1.42M
          -0.1621205195722993f,
214
1.42M
          0.0000000000000000,
215
1.42M
          -0.0643507165794628f,
216
1.42M
          0.0074182263792424f,
217
1.42M
          -0.2904801297289980f,
218
1.42M
          0.0952002265347504f,
219
1.42M
          0.0000000000000000,
220
1.42M
          -0.3675398009862027f,
221
1.42M
          0.4921585901373873f,
222
1.42M
          0.2462710772207515f,
223
1.42M
          -0.0794670660590957f,
224
1.42M
          0.3623817333531167f,
225
1.42M
          -0.4351904965232280f,
226
1.42M
      },
227
1.42M
      {
228
1.42M
          0.2500000000000000,
229
1.42M
          -0.1014005039375376f,
230
1.42M
          0.2929100136981264f,
231
1.42M
          0.0000000000000000,
232
1.42M
          0.0000000000000000,
233
1.42M
          -0.0643507165794627f,
234
1.42M
          0.3935103426921017f,
235
1.42M
          -0.0657870154914280f,
236
1.42M
          0.0000000000000000,
237
1.42M
          -0.4082482904638628f,
238
1.42M
          -0.3078822139579090f,
239
1.42M
          -0.3852501370925192f,
240
1.42M
          -0.0857401903551931f,
241
1.42M
          -0.4613374887461511f,
242
1.42M
          0.0000000000000000,
243
1.42M
          0.2191868483885747f,
244
1.42M
      },
245
1.42M
      {
246
1.42M
          0.2500000000000000,
247
1.42M
          -0.1014005039375376f,
248
1.42M
          -0.4067007583026072f,
249
1.42M
          -0.2125574805828705f,
250
1.42M
          0.0000000000000000,
251
1.42M
          -0.0643507165794627f,
252
1.42M
          -0.4517556589999464f,
253
1.42M
          0.3046847507248840f,
254
1.42M
          0.3017929516615503f,
255
1.42M
          -0.4082482904638635f,
256
1.42M
          -0.1747866975480813f,
257
1.42M
          0.2110560104933581f,
258
1.42M
          -0.1426608480880734f,
259
1.42M
          -0.1381354035075829f,
260
1.42M
          -0.1743760259965108f,
261
1.42M
          0.1135498731499426f,
262
1.42M
      },
263
1.42M
      {
264
1.42M
          0.2500000000000000,
265
1.42M
          -0.1014005039375377f,
266
1.42M
          -0.1957439937204287f,
267
1.42M
          -0.1621205195722833f,
268
1.42M
          0.0000000000000000,
269
1.42M
          -0.0643507165794628f,
270
1.42M
          0.0074182263792444f,
271
1.42M
          0.2904801297290076f,
272
1.42M
          0.0952002265347505f,
273
1.42M
          0.0000000000000000,
274
1.42M
          0.3675398009862011f,
275
1.42M
          -0.4921585901373891f,
276
1.42M
          0.2462710772207514f,
277
1.42M
          -0.0794670660591026f,
278
1.42M
          0.3623817333531165f,
279
1.42M
          -0.4351904965232251f,
280
1.42M
      },
281
1.42M
      {
282
1.42M
          0.2500000000000000,
283
1.42M
          -0.1014005039375375f,
284
1.42M
          0.0000000000000000,
285
1.42M
          -0.4706702258572528f,
286
1.42M
          0.0000000000000000,
287
1.42M
          -0.0643507165794627f,
288
1.42M
          0.1107416575309343f,
289
1.42M
          0.0000000000000000,
290
1.42M
          -0.1627234014286617f,
291
1.42M
          0.0000000000000000,
292
1.42M
          0.0000000000000000,
293
1.42M
          0.0000000000000000,
294
1.42M
          0.1488339922711357f,
295
1.42M
          0.4972464710953509f,
296
1.42M
          0.2921026642334879f,
297
1.42M
          0.5550443808910661f,
298
1.42M
      },
299
1.42M
      {
300
1.42M
          0.2500000000000000,
301
1.42M
          -0.1014005039375377f,
302
1.42M
          0.1137907446044809f,
303
1.42M
          -0.1464291867126764f,
304
1.42M
          0.0000000000000000,
305
1.42M
          -0.0643507165794628f,
306
1.42M
          0.0829816309488205f,
307
1.42M
          -0.2388977352334460f,
308
1.42M
          -0.3531238544981630f,
309
1.42M
          -0.4082482904638630f,
310
1.42M
          0.4826689115059883f,
311
1.42M
          0.1741941265991622f,
312
1.42M
          -0.0476868035022925f,
313
1.42M
          0.1253805944856366f,
314
1.42M
          -0.4326608024727445f,
315
1.42M
          -0.2546827712406646f,
316
1.42M
      },
317
1.42M
      {
318
1.42M
          0.2500000000000000,
319
1.42M
          -0.1014005039375377f,
320
1.42M
          -0.4444481661973438f,
321
1.42M
          0.3085497062849487f,
322
1.42M
          0.0000000000000000,
323
1.42M
          -0.0643507165794628f,
324
1.42M
          0.1585450355183970f,
325
1.42M
          -0.5112616136592012f,
326
1.42M
          0.2579236279634129f,
327
1.42M
          0.0000000000000000,
328
1.42M
          -0.0812611176717504f,
329
1.42M
          -0.1856718091610990f,
330
1.42M
          -0.3416446842253373f,
331
1.42M
          0.3302282550303805f,
332
1.42M
          0.0702790691196282f,
333
1.42M
          -0.0741750459581023f,
334
1.42M
      },
335
1.42M
      {
336
1.42M
          0.2500000000000000,
337
1.42M
          -0.1014005039375376f,
338
1.42M
          -0.2929100136981264f,
339
1.42M
          0.0000000000000000,
340
1.42M
          0.0000000000000000,
341
1.42M
          -0.0643507165794627f,
342
1.42M
          0.3935103426921022f,
343
1.42M
          0.0657870154914254f,
344
1.42M
          0.0000000000000000,
345
1.42M
          0.4082482904638634f,
346
1.42M
          0.3078822139579031f,
347
1.42M
          0.3852501370925211f,
348
1.42M
          -0.0857401903551927f,
349
1.42M
          -0.4613374887461554f,
350
1.42M
          0.0000000000000000,
351
1.42M
          0.2191868483885728f,
352
1.42M
      },
353
1.42M
      {
354
1.42M
          0.2500000000000000,
355
1.42M
          -0.1014005039375376f,
356
1.42M
          -0.1137907446044814f,
357
1.42M
          -0.1464291867126654f,
358
1.42M
          0.0000000000000000,
359
1.42M
          -0.0643507165794627f,
360
1.42M
          0.0829816309488214f,
361
1.42M
          0.2388977352334547f,
362
1.42M
          -0.3531238544981624f,
363
1.42M
          0.4082482904638630f,
364
1.42M
          -0.4826689115059858f,
365
1.42M
          -0.1741941265991621f,
366
1.42M
          -0.0476868035022928f,
367
1.42M
          0.1253805944856431f,
368
1.42M
          -0.4326608024727457f,
369
1.42M
          -0.2546827712406641f,
370
1.42M
      },
371
1.42M
      {
372
1.42M
          0.2500000000000000,
373
1.42M
          -0.1014005039375374f,
374
1.42M
          0.0000000000000000,
375
1.42M
          0.4251149611657548f,
376
1.42M
          0.0000000000000000,
377
1.42M
          -0.0643507165794626f,
378
1.42M
          -0.4517556589999480f,
379
1.42M
          0.0000000000000000,
380
1.42M
          -0.6035859033230976f,
381
1.42M
          0.0000000000000000,
382
1.42M
          0.0000000000000000,
383
1.42M
          0.0000000000000000,
384
1.42M
          -0.1426608480880724f,
385
1.42M
          -0.1381354035075845f,
386
1.42M
          0.3487520519930227f,
387
1.42M
          0.1135498731499429f,
388
1.42M
      },
389
1.42M
  };
390
391
1.42M
  const HWY_CAPPED(float, 16) d;
392
4.26M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
2.84M
    auto scalar = Zero(d);
394
48.3M
    for (size_t j = 0; j < 16; j++) {
395
45.5M
      auto px = Set(d, pixels[j]);
396
45.5M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
45.5M
      scalar = MulAdd(px, basis, scalar);
398
45.5M
    }
399
2.84M
    Store(scalar, d, coeffs + i);
400
2.84M
  }
401
1.42M
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
71.8M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
71.8M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
71.8M
      {
102
71.8M
          0.2500000000000000,
103
71.8M
          0.8769029297991420f,
104
71.8M
          0.0000000000000000,
105
71.8M
          0.0000000000000000,
106
71.8M
          0.0000000000000000,
107
71.8M
          -0.4105377591765233f,
108
71.8M
          0.0000000000000000,
109
71.8M
          0.0000000000000000,
110
71.8M
          0.0000000000000000,
111
71.8M
          0.0000000000000000,
112
71.8M
          0.0000000000000000,
113
71.8M
          0.0000000000000000,
114
71.8M
          0.0000000000000000,
115
71.8M
          0.0000000000000000,
116
71.8M
          0.0000000000000000,
117
71.8M
          0.0000000000000000,
118
71.8M
      },
119
71.8M
      {
120
71.8M
          0.2500000000000000,
121
71.8M
          0.2206518106944235f,
122
71.8M
          0.0000000000000000,
123
71.8M
          0.0000000000000000,
124
71.8M
          -0.7071067811865474f,
125
71.8M
          0.6235485373547691f,
126
71.8M
          0.0000000000000000,
127
71.8M
          0.0000000000000000,
128
71.8M
          0.0000000000000000,
129
71.8M
          0.0000000000000000,
130
71.8M
          0.0000000000000000,
131
71.8M
          0.0000000000000000,
132
71.8M
          0.0000000000000000,
133
71.8M
          0.0000000000000000,
134
71.8M
          0.0000000000000000,
135
71.8M
          0.0000000000000000,
136
71.8M
      },
137
71.8M
      {
138
71.8M
          0.2500000000000000,
139
71.8M
          -0.1014005039375376f,
140
71.8M
          0.4067007583026075f,
141
71.8M
          -0.2125574805828875f,
142
71.8M
          0.0000000000000000,
143
71.8M
          -0.0643507165794627f,
144
71.8M
          -0.4517556589999482f,
145
71.8M
          -0.3046847507248690f,
146
71.8M
          0.3017929516615495f,
147
71.8M
          0.4082482904638627f,
148
71.8M
          0.1747866975480809f,
149
71.8M
          -0.2110560104933578f,
150
71.8M
          -0.1426608480880726f,
151
71.8M
          -0.1381354035075859f,
152
71.8M
          -0.1743760259965107f,
153
71.8M
          0.1135498731499434f,
154
71.8M
      },
155
71.8M
      {
156
71.8M
          0.2500000000000000,
157
71.8M
          -0.1014005039375375f,
158
71.8M
          0.4444481661973445f,
159
71.8M
          0.3085497062849767f,
160
71.8M
          0.0000000000000000f,
161
71.8M
          -0.0643507165794627f,
162
71.8M
          0.1585450355184006f,
163
71.8M
          0.5112616136591823f,
164
71.8M
          0.2579236279634118f,
165
71.8M
          0.0000000000000000,
166
71.8M
          0.0812611176717539f,
167
71.8M
          0.1856718091610980f,
168
71.8M
          -0.3416446842253372f,
169
71.8M
          0.3302282550303788f,
170
71.8M
          0.0702790691196284f,
171
71.8M
          -0.0741750459581035f,
172
71.8M
      },
173
71.8M
      {
174
71.8M
          0.2500000000000000,
175
71.8M
          0.2206518106944236f,
176
71.8M
          0.0000000000000000,
177
71.8M
          0.0000000000000000,
178
71.8M
          0.7071067811865476f,
179
71.8M
          0.6235485373547694f,
180
71.8M
          0.0000000000000000,
181
71.8M
          0.0000000000000000,
182
71.8M
          0.0000000000000000,
183
71.8M
          0.0000000000000000,
184
71.8M
          0.0000000000000000,
185
71.8M
          0.0000000000000000,
186
71.8M
          0.0000000000000000,
187
71.8M
          0.0000000000000000,
188
71.8M
          0.0000000000000000,
189
71.8M
          0.0000000000000000,
190
71.8M
      },
191
71.8M
      {
192
71.8M
          0.2500000000000000,
193
71.8M
          -0.1014005039375378f,
194
71.8M
          0.0000000000000000,
195
71.8M
          0.4706702258572536f,
196
71.8M
          0.0000000000000000,
197
71.8M
          -0.0643507165794628f,
198
71.8M
          -0.0403851516082220f,
199
71.8M
          0.0000000000000000,
200
71.8M
          0.1627234014286620f,
201
71.8M
          0.0000000000000000,
202
71.8M
          0.0000000000000000,
203
71.8M
          0.0000000000000000,
204
71.8M
          0.7367497537172237f,
205
71.8M
          0.0875511500058708f,
206
71.8M
          -0.2921026642334881f,
207
71.8M
          0.1940289303259434f,
208
71.8M
      },
209
71.8M
      {
210
71.8M
          0.2500000000000000,
211
71.8M
          -0.1014005039375377f,
212
71.8M
          0.1957439937204294f,
213
71.8M
          -0.1621205195722993f,
214
71.8M
          0.0000000000000000,
215
71.8M
          -0.0643507165794628f,
216
71.8M
          0.0074182263792424f,
217
71.8M
          -0.2904801297289980f,
218
71.8M
          0.0952002265347504f,
219
71.8M
          0.0000000000000000,
220
71.8M
          -0.3675398009862027f,
221
71.8M
          0.4921585901373873f,
222
71.8M
          0.2462710772207515f,
223
71.8M
          -0.0794670660590957f,
224
71.8M
          0.3623817333531167f,
225
71.8M
          -0.4351904965232280f,
226
71.8M
      },
227
71.8M
      {
228
71.8M
          0.2500000000000000,
229
71.8M
          -0.1014005039375376f,
230
71.8M
          0.2929100136981264f,
231
71.8M
          0.0000000000000000,
232
71.8M
          0.0000000000000000,
233
71.8M
          -0.0643507165794627f,
234
71.8M
          0.3935103426921017f,
235
71.8M
          -0.0657870154914280f,
236
71.8M
          0.0000000000000000,
237
71.8M
          -0.4082482904638628f,
238
71.8M
          -0.3078822139579090f,
239
71.8M
          -0.3852501370925192f,
240
71.8M
          -0.0857401903551931f,
241
71.8M
          -0.4613374887461511f,
242
71.8M
          0.0000000000000000,
243
71.8M
          0.2191868483885747f,
244
71.8M
      },
245
71.8M
      {
246
71.8M
          0.2500000000000000,
247
71.8M
          -0.1014005039375376f,
248
71.8M
          -0.4067007583026072f,
249
71.8M
          -0.2125574805828705f,
250
71.8M
          0.0000000000000000,
251
71.8M
          -0.0643507165794627f,
252
71.8M
          -0.4517556589999464f,
253
71.8M
          0.3046847507248840f,
254
71.8M
          0.3017929516615503f,
255
71.8M
          -0.4082482904638635f,
256
71.8M
          -0.1747866975480813f,
257
71.8M
          0.2110560104933581f,
258
71.8M
          -0.1426608480880734f,
259
71.8M
          -0.1381354035075829f,
260
71.8M
          -0.1743760259965108f,
261
71.8M
          0.1135498731499426f,
262
71.8M
      },
263
71.8M
      {
264
71.8M
          0.2500000000000000,
265
71.8M
          -0.1014005039375377f,
266
71.8M
          -0.1957439937204287f,
267
71.8M
          -0.1621205195722833f,
268
71.8M
          0.0000000000000000,
269
71.8M
          -0.0643507165794628f,
270
71.8M
          0.0074182263792444f,
271
71.8M
          0.2904801297290076f,
272
71.8M
          0.0952002265347505f,
273
71.8M
          0.0000000000000000,
274
71.8M
          0.3675398009862011f,
275
71.8M
          -0.4921585901373891f,
276
71.8M
          0.2462710772207514f,
277
71.8M
          -0.0794670660591026f,
278
71.8M
          0.3623817333531165f,
279
71.8M
          -0.4351904965232251f,
280
71.8M
      },
281
71.8M
      {
282
71.8M
          0.2500000000000000,
283
71.8M
          -0.1014005039375375f,
284
71.8M
          0.0000000000000000,
285
71.8M
          -0.4706702258572528f,
286
71.8M
          0.0000000000000000,
287
71.8M
          -0.0643507165794627f,
288
71.8M
          0.1107416575309343f,
289
71.8M
          0.0000000000000000,
290
71.8M
          -0.1627234014286617f,
291
71.8M
          0.0000000000000000,
292
71.8M
          0.0000000000000000,
293
71.8M
          0.0000000000000000,
294
71.8M
          0.1488339922711357f,
295
71.8M
          0.4972464710953509f,
296
71.8M
          0.2921026642334879f,
297
71.8M
          0.5550443808910661f,
298
71.8M
      },
299
71.8M
      {
300
71.8M
          0.2500000000000000,
301
71.8M
          -0.1014005039375377f,
302
71.8M
          0.1137907446044809f,
303
71.8M
          -0.1464291867126764f,
304
71.8M
          0.0000000000000000,
305
71.8M
          -0.0643507165794628f,
306
71.8M
          0.0829816309488205f,
307
71.8M
          -0.2388977352334460f,
308
71.8M
          -0.3531238544981630f,
309
71.8M
          -0.4082482904638630f,
310
71.8M
          0.4826689115059883f,
311
71.8M
          0.1741941265991622f,
312
71.8M
          -0.0476868035022925f,
313
71.8M
          0.1253805944856366f,
314
71.8M
          -0.4326608024727445f,
315
71.8M
          -0.2546827712406646f,
316
71.8M
      },
317
71.8M
      {
318
71.8M
          0.2500000000000000,
319
71.8M
          -0.1014005039375377f,
320
71.8M
          -0.4444481661973438f,
321
71.8M
          0.3085497062849487f,
322
71.8M
          0.0000000000000000,
323
71.8M
          -0.0643507165794628f,
324
71.8M
          0.1585450355183970f,
325
71.8M
          -0.5112616136592012f,
326
71.8M
          0.2579236279634129f,
327
71.8M
          0.0000000000000000,
328
71.8M
          -0.0812611176717504f,
329
71.8M
          -0.1856718091610990f,
330
71.8M
          -0.3416446842253373f,
331
71.8M
          0.3302282550303805f,
332
71.8M
          0.0702790691196282f,
333
71.8M
          -0.0741750459581023f,
334
71.8M
      },
335
71.8M
      {
336
71.8M
          0.2500000000000000,
337
71.8M
          -0.1014005039375376f,
338
71.8M
          -0.2929100136981264f,
339
71.8M
          0.0000000000000000,
340
71.8M
          0.0000000000000000,
341
71.8M
          -0.0643507165794627f,
342
71.8M
          0.3935103426921022f,
343
71.8M
          0.0657870154914254f,
344
71.8M
          0.0000000000000000,
345
71.8M
          0.4082482904638634f,
346
71.8M
          0.3078822139579031f,
347
71.8M
          0.3852501370925211f,
348
71.8M
          -0.0857401903551927f,
349
71.8M
          -0.4613374887461554f,
350
71.8M
          0.0000000000000000,
351
71.8M
          0.2191868483885728f,
352
71.8M
      },
353
71.8M
      {
354
71.8M
          0.2500000000000000,
355
71.8M
          -0.1014005039375376f,
356
71.8M
          -0.1137907446044814f,
357
71.8M
          -0.1464291867126654f,
358
71.8M
          0.0000000000000000,
359
71.8M
          -0.0643507165794627f,
360
71.8M
          0.0829816309488214f,
361
71.8M
          0.2388977352334547f,
362
71.8M
          -0.3531238544981624f,
363
71.8M
          0.4082482904638630f,
364
71.8M
          -0.4826689115059858f,
365
71.8M
          -0.1741941265991621f,
366
71.8M
          -0.0476868035022928f,
367
71.8M
          0.1253805944856431f,
368
71.8M
          -0.4326608024727457f,
369
71.8M
          -0.2546827712406641f,
370
71.8M
      },
371
71.8M
      {
372
71.8M
          0.2500000000000000,
373
71.8M
          -0.1014005039375374f,
374
71.8M
          0.0000000000000000,
375
71.8M
          0.4251149611657548f,
376
71.8M
          0.0000000000000000,
377
71.8M
          -0.0643507165794626f,
378
71.8M
          -0.4517556589999480f,
379
71.8M
          0.0000000000000000,
380
71.8M
          -0.6035859033230976f,
381
71.8M
          0.0000000000000000,
382
71.8M
          0.0000000000000000,
383
71.8M
          0.0000000000000000,
384
71.8M
          -0.1426608480880724f,
385
71.8M
          -0.1381354035075845f,
386
71.8M
          0.3487520519930227f,
387
71.8M
          0.1135498731499429f,
388
71.8M
      },
389
71.8M
  };
390
391
71.8M
  const HWY_CAPPED(float, 16) d;
392
215M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
143M
    auto scalar = Zero(d);
394
2.44G
    for (size_t j = 0; j < 16; j++) {
395
2.29G
      auto px = Set(d, pixels[j]);
396
2.29G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
2.29G
      scalar = MulAdd(px, basis, scalar);
398
2.29G
    }
399
143M
    Store(scalar, d, coeffs + i);
400
143M
  }
401
71.8M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
1.42M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
1.42M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
1.42M
      {
102
1.42M
          0.2500000000000000,
103
1.42M
          0.8769029297991420f,
104
1.42M
          0.0000000000000000,
105
1.42M
          0.0000000000000000,
106
1.42M
          0.0000000000000000,
107
1.42M
          -0.4105377591765233f,
108
1.42M
          0.0000000000000000,
109
1.42M
          0.0000000000000000,
110
1.42M
          0.0000000000000000,
111
1.42M
          0.0000000000000000,
112
1.42M
          0.0000000000000000,
113
1.42M
          0.0000000000000000,
114
1.42M
          0.0000000000000000,
115
1.42M
          0.0000000000000000,
116
1.42M
          0.0000000000000000,
117
1.42M
          0.0000000000000000,
118
1.42M
      },
119
1.42M
      {
120
1.42M
          0.2500000000000000,
121
1.42M
          0.2206518106944235f,
122
1.42M
          0.0000000000000000,
123
1.42M
          0.0000000000000000,
124
1.42M
          -0.7071067811865474f,
125
1.42M
          0.6235485373547691f,
126
1.42M
          0.0000000000000000,
127
1.42M
          0.0000000000000000,
128
1.42M
          0.0000000000000000,
129
1.42M
          0.0000000000000000,
130
1.42M
          0.0000000000000000,
131
1.42M
          0.0000000000000000,
132
1.42M
          0.0000000000000000,
133
1.42M
          0.0000000000000000,
134
1.42M
          0.0000000000000000,
135
1.42M
          0.0000000000000000,
136
1.42M
      },
137
1.42M
      {
138
1.42M
          0.2500000000000000,
139
1.42M
          -0.1014005039375376f,
140
1.42M
          0.4067007583026075f,
141
1.42M
          -0.2125574805828875f,
142
1.42M
          0.0000000000000000,
143
1.42M
          -0.0643507165794627f,
144
1.42M
          -0.4517556589999482f,
145
1.42M
          -0.3046847507248690f,
146
1.42M
          0.3017929516615495f,
147
1.42M
          0.4082482904638627f,
148
1.42M
          0.1747866975480809f,
149
1.42M
          -0.2110560104933578f,
150
1.42M
          -0.1426608480880726f,
151
1.42M
          -0.1381354035075859f,
152
1.42M
          -0.1743760259965107f,
153
1.42M
          0.1135498731499434f,
154
1.42M
      },
155
1.42M
      {
156
1.42M
          0.2500000000000000,
157
1.42M
          -0.1014005039375375f,
158
1.42M
          0.4444481661973445f,
159
1.42M
          0.3085497062849767f,
160
1.42M
          0.0000000000000000f,
161
1.42M
          -0.0643507165794627f,
162
1.42M
          0.1585450355184006f,
163
1.42M
          0.5112616136591823f,
164
1.42M
          0.2579236279634118f,
165
1.42M
          0.0000000000000000,
166
1.42M
          0.0812611176717539f,
167
1.42M
          0.1856718091610980f,
168
1.42M
          -0.3416446842253372f,
169
1.42M
          0.3302282550303788f,
170
1.42M
          0.0702790691196284f,
171
1.42M
          -0.0741750459581035f,
172
1.42M
      },
173
1.42M
      {
174
1.42M
          0.2500000000000000,
175
1.42M
          0.2206518106944236f,
176
1.42M
          0.0000000000000000,
177
1.42M
          0.0000000000000000,
178
1.42M
          0.7071067811865476f,
179
1.42M
          0.6235485373547694f,
180
1.42M
          0.0000000000000000,
181
1.42M
          0.0000000000000000,
182
1.42M
          0.0000000000000000,
183
1.42M
          0.0000000000000000,
184
1.42M
          0.0000000000000000,
185
1.42M
          0.0000000000000000,
186
1.42M
          0.0000000000000000,
187
1.42M
          0.0000000000000000,
188
1.42M
          0.0000000000000000,
189
1.42M
          0.0000000000000000,
190
1.42M
      },
191
1.42M
      {
192
1.42M
          0.2500000000000000,
193
1.42M
          -0.1014005039375378f,
194
1.42M
          0.0000000000000000,
195
1.42M
          0.4706702258572536f,
196
1.42M
          0.0000000000000000,
197
1.42M
          -0.0643507165794628f,
198
1.42M
          -0.0403851516082220f,
199
1.42M
          0.0000000000000000,
200
1.42M
          0.1627234014286620f,
201
1.42M
          0.0000000000000000,
202
1.42M
          0.0000000000000000,
203
1.42M
          0.0000000000000000,
204
1.42M
          0.7367497537172237f,
205
1.42M
          0.0875511500058708f,
206
1.42M
          -0.2921026642334881f,
207
1.42M
          0.1940289303259434f,
208
1.42M
      },
209
1.42M
      {
210
1.42M
          0.2500000000000000,
211
1.42M
          -0.1014005039375377f,
212
1.42M
          0.1957439937204294f,
213
1.42M
          -0.1621205195722993f,
214
1.42M
          0.0000000000000000,
215
1.42M
          -0.0643507165794628f,
216
1.42M
          0.0074182263792424f,
217
1.42M
          -0.2904801297289980f,
218
1.42M
          0.0952002265347504f,
219
1.42M
          0.0000000000000000,
220
1.42M
          -0.3675398009862027f,
221
1.42M
          0.4921585901373873f,
222
1.42M
          0.2462710772207515f,
223
1.42M
          -0.0794670660590957f,
224
1.42M
          0.3623817333531167f,
225
1.42M
          -0.4351904965232280f,
226
1.42M
      },
227
1.42M
      {
228
1.42M
          0.2500000000000000,
229
1.42M
          -0.1014005039375376f,
230
1.42M
          0.2929100136981264f,
231
1.42M
          0.0000000000000000,
232
1.42M
          0.0000000000000000,
233
1.42M
          -0.0643507165794627f,
234
1.42M
          0.3935103426921017f,
235
1.42M
          -0.0657870154914280f,
236
1.42M
          0.0000000000000000,
237
1.42M
          -0.4082482904638628f,
238
1.42M
          -0.3078822139579090f,
239
1.42M
          -0.3852501370925192f,
240
1.42M
          -0.0857401903551931f,
241
1.42M
          -0.4613374887461511f,
242
1.42M
          0.0000000000000000,
243
1.42M
          0.2191868483885747f,
244
1.42M
      },
245
1.42M
      {
246
1.42M
          0.2500000000000000,
247
1.42M
          -0.1014005039375376f,
248
1.42M
          -0.4067007583026072f,
249
1.42M
          -0.2125574805828705f,
250
1.42M
          0.0000000000000000,
251
1.42M
          -0.0643507165794627f,
252
1.42M
          -0.4517556589999464f,
253
1.42M
          0.3046847507248840f,
254
1.42M
          0.3017929516615503f,
255
1.42M
          -0.4082482904638635f,
256
1.42M
          -0.1747866975480813f,
257
1.42M
          0.2110560104933581f,
258
1.42M
          -0.1426608480880734f,
259
1.42M
          -0.1381354035075829f,
260
1.42M
          -0.1743760259965108f,
261
1.42M
          0.1135498731499426f,
262
1.42M
      },
263
1.42M
      {
264
1.42M
          0.2500000000000000,
265
1.42M
          -0.1014005039375377f,
266
1.42M
          -0.1957439937204287f,
267
1.42M
          -0.1621205195722833f,
268
1.42M
          0.0000000000000000,
269
1.42M
          -0.0643507165794628f,
270
1.42M
          0.0074182263792444f,
271
1.42M
          0.2904801297290076f,
272
1.42M
          0.0952002265347505f,
273
1.42M
          0.0000000000000000,
274
1.42M
          0.3675398009862011f,
275
1.42M
          -0.4921585901373891f,
276
1.42M
          0.2462710772207514f,
277
1.42M
          -0.0794670660591026f,
278
1.42M
          0.3623817333531165f,
279
1.42M
          -0.4351904965232251f,
280
1.42M
      },
281
1.42M
      {
282
1.42M
          0.2500000000000000,
283
1.42M
          -0.1014005039375375f,
284
1.42M
          0.0000000000000000,
285
1.42M
          -0.4706702258572528f,
286
1.42M
          0.0000000000000000,
287
1.42M
          -0.0643507165794627f,
288
1.42M
          0.1107416575309343f,
289
1.42M
          0.0000000000000000,
290
1.42M
          -0.1627234014286617f,
291
1.42M
          0.0000000000000000,
292
1.42M
          0.0000000000000000,
293
1.42M
          0.0000000000000000,
294
1.42M
          0.1488339922711357f,
295
1.42M
          0.4972464710953509f,
296
1.42M
          0.2921026642334879f,
297
1.42M
          0.5550443808910661f,
298
1.42M
      },
299
1.42M
      {
300
1.42M
          0.2500000000000000,
301
1.42M
          -0.1014005039375377f,
302
1.42M
          0.1137907446044809f,
303
1.42M
          -0.1464291867126764f,
304
1.42M
          0.0000000000000000,
305
1.42M
          -0.0643507165794628f,
306
1.42M
          0.0829816309488205f,
307
1.42M
          -0.2388977352334460f,
308
1.42M
          -0.3531238544981630f,
309
1.42M
          -0.4082482904638630f,
310
1.42M
          0.4826689115059883f,
311
1.42M
          0.1741941265991622f,
312
1.42M
          -0.0476868035022925f,
313
1.42M
          0.1253805944856366f,
314
1.42M
          -0.4326608024727445f,
315
1.42M
          -0.2546827712406646f,
316
1.42M
      },
317
1.42M
      {
318
1.42M
          0.2500000000000000,
319
1.42M
          -0.1014005039375377f,
320
1.42M
          -0.4444481661973438f,
321
1.42M
          0.3085497062849487f,
322
1.42M
          0.0000000000000000,
323
1.42M
          -0.0643507165794628f,
324
1.42M
          0.1585450355183970f,
325
1.42M
          -0.5112616136592012f,
326
1.42M
          0.2579236279634129f,
327
1.42M
          0.0000000000000000,
328
1.42M
          -0.0812611176717504f,
329
1.42M
          -0.1856718091610990f,
330
1.42M
          -0.3416446842253373f,
331
1.42M
          0.3302282550303805f,
332
1.42M
          0.0702790691196282f,
333
1.42M
          -0.0741750459581023f,
334
1.42M
      },
335
1.42M
      {
336
1.42M
          0.2500000000000000,
337
1.42M
          -0.1014005039375376f,
338
1.42M
          -0.2929100136981264f,
339
1.42M
          0.0000000000000000,
340
1.42M
          0.0000000000000000,
341
1.42M
          -0.0643507165794627f,
342
1.42M
          0.3935103426921022f,
343
1.42M
          0.0657870154914254f,
344
1.42M
          0.0000000000000000,
345
1.42M
          0.4082482904638634f,
346
1.42M
          0.3078822139579031f,
347
1.42M
          0.3852501370925211f,
348
1.42M
          -0.0857401903551927f,
349
1.42M
          -0.4613374887461554f,
350
1.42M
          0.0000000000000000,
351
1.42M
          0.2191868483885728f,
352
1.42M
      },
353
1.42M
      {
354
1.42M
          0.2500000000000000,
355
1.42M
          -0.1014005039375376f,
356
1.42M
          -0.1137907446044814f,
357
1.42M
          -0.1464291867126654f,
358
1.42M
          0.0000000000000000,
359
1.42M
          -0.0643507165794627f,
360
1.42M
          0.0829816309488214f,
361
1.42M
          0.2388977352334547f,
362
1.42M
          -0.3531238544981624f,
363
1.42M
          0.4082482904638630f,
364
1.42M
          -0.4826689115059858f,
365
1.42M
          -0.1741941265991621f,
366
1.42M
          -0.0476868035022928f,
367
1.42M
          0.1253805944856431f,
368
1.42M
          -0.4326608024727457f,
369
1.42M
          -0.2546827712406641f,
370
1.42M
      },
371
1.42M
      {
372
1.42M
          0.2500000000000000,
373
1.42M
          -0.1014005039375374f,
374
1.42M
          0.0000000000000000,
375
1.42M
          0.4251149611657548f,
376
1.42M
          0.0000000000000000,
377
1.42M
          -0.0643507165794626f,
378
1.42M
          -0.4517556589999480f,
379
1.42M
          0.0000000000000000,
380
1.42M
          -0.6035859033230976f,
381
1.42M
          0.0000000000000000,
382
1.42M
          0.0000000000000000,
383
1.42M
          0.0000000000000000,
384
1.42M
          -0.1426608480880724f,
385
1.42M
          -0.1381354035075845f,
386
1.42M
          0.3487520519930227f,
387
1.42M
          0.1135498731499429f,
388
1.42M
      },
389
1.42M
  };
390
391
1.42M
  const HWY_CAPPED(float, 16) d;
392
4.26M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
2.84M
    auto scalar = Zero(d);
394
48.3M
    for (size_t j = 0; j < 16; j++) {
395
45.5M
      auto px = Set(d, pixels[j]);
396
45.5M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
45.5M
      scalar = MulAdd(px, basis, scalar);
398
45.5M
    }
399
2.84M
    Store(scalar, d, coeffs + i);
400
2.84M
  }
401
1.42M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
402
403
// Coefficient layout:
404
//  - (even, even) positions hold AFV coefficients
405
//  - (odd, even) positions hold DCT4x4 coefficients
406
//  - (any, odd) positions hold DCT4x8 coefficients
407
template <size_t afv_kind>
408
void AFVTransformFromPixels(const float* JXL_RESTRICT pixels,
409
                            size_t pixels_stride,
410
74.6M
                            float* JXL_RESTRICT coefficients) {
411
74.6M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
74.6M
  size_t afv_x = afv_kind & 1;
413
74.6M
  size_t afv_y = afv_kind / 2;
414
74.6M
  HWY_ALIGN float block[4 * 8] = {};
415
373M
  for (size_t iy = 0; iy < 4; iy++) {
416
1.49G
    for (size_t ix = 0; ix < 4; ix++) {
417
1.19G
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.19G
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.19G
    }
420
298M
  }
421
  // AFV coefficients in (even, even) positions.
422
74.6M
  HWY_ALIGN float coeff[4 * 4];
423
74.6M
  AFVDCT4x4(block, coeff);
424
373M
  for (size_t iy = 0; iy < 4; iy++) {
425
1.49G
    for (size_t ix = 0; ix < 4; ix++) {
426
1.19G
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.19G
    }
428
298M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
74.6M
  ComputeScaledDCT<4, 4>()(
431
74.6M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
74.6M
              pixels_stride),
433
74.6M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
373M
  for (size_t iy = 0; iy < 4; iy++) {
436
2.68G
    for (size_t ix = 0; ix < 8; ix++) {
437
2.38G
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.38G
    }
439
298M
  }
440
  // 4x8 DCT of the other half of the block.
441
74.6M
  ComputeScaledDCT<4, 8>()(
442
74.6M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
74.6M
      block, scratch_space);
444
373M
  for (size_t iy = 0; iy < 4; iy++) {
445
2.68G
    for (size_t ix = 0; ix < 8; ix++) {
446
2.38G
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.38G
    }
448
298M
  }
449
74.6M
  float block00 = coefficients[0] * 0.25f;
450
74.6M
  float block01 = coefficients[1];
451
74.6M
  float block10 = coefficients[8];
452
74.6M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
74.6M
  coefficients[1] = (block00 - block01) * 0.5f;
454
74.6M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
74.6M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
296k
                            float* JXL_RESTRICT coefficients) {
411
296k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
296k
  size_t afv_x = afv_kind & 1;
413
296k
  size_t afv_y = afv_kind / 2;
414
296k
  HWY_ALIGN float block[4 * 8] = {};
415
1.48M
  for (size_t iy = 0; iy < 4; iy++) {
416
5.92M
    for (size_t ix = 0; ix < 4; ix++) {
417
4.73M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
4.73M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
4.73M
    }
420
1.18M
  }
421
  // AFV coefficients in (even, even) positions.
422
296k
  HWY_ALIGN float coeff[4 * 4];
423
296k
  AFVDCT4x4(block, coeff);
424
1.48M
  for (size_t iy = 0; iy < 4; iy++) {
425
5.92M
    for (size_t ix = 0; ix < 4; ix++) {
426
4.73M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
4.73M
    }
428
1.18M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
296k
  ComputeScaledDCT<4, 4>()(
431
296k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
296k
              pixels_stride),
433
296k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.48M
  for (size_t iy = 0; iy < 4; iy++) {
436
10.6M
    for (size_t ix = 0; ix < 8; ix++) {
437
9.47M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
9.47M
    }
439
1.18M
  }
440
  // 4x8 DCT of the other half of the block.
441
296k
  ComputeScaledDCT<4, 8>()(
442
296k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
296k
      block, scratch_space);
444
1.48M
  for (size_t iy = 0; iy < 4; iy++) {
445
10.6M
    for (size_t ix = 0; ix < 8; ix++) {
446
9.47M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
9.47M
    }
448
1.18M
  }
449
296k
  float block00 = coefficients[0] * 0.25f;
450
296k
  float block01 = coefficients[1];
451
296k
  float block10 = coefficients[8];
452
296k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
296k
  coefficients[1] = (block00 - block01) * 0.5f;
454
296k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
296k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
522k
                            float* JXL_RESTRICT coefficients) {
411
522k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
522k
  size_t afv_x = afv_kind & 1;
413
522k
  size_t afv_y = afv_kind / 2;
414
522k
  HWY_ALIGN float block[4 * 8] = {};
415
2.61M
  for (size_t iy = 0; iy < 4; iy++) {
416
10.4M
    for (size_t ix = 0; ix < 4; ix++) {
417
8.36M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
8.36M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
8.36M
    }
420
2.09M
  }
421
  // AFV coefficients in (even, even) positions.
422
522k
  HWY_ALIGN float coeff[4 * 4];
423
522k
  AFVDCT4x4(block, coeff);
424
2.61M
  for (size_t iy = 0; iy < 4; iy++) {
425
10.4M
    for (size_t ix = 0; ix < 4; ix++) {
426
8.36M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
8.36M
    }
428
2.09M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
522k
  ComputeScaledDCT<4, 4>()(
431
522k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
522k
              pixels_stride),
433
522k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
2.61M
  for (size_t iy = 0; iy < 4; iy++) {
436
18.8M
    for (size_t ix = 0; ix < 8; ix++) {
437
16.7M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
16.7M
    }
439
2.09M
  }
440
  // 4x8 DCT of the other half of the block.
441
522k
  ComputeScaledDCT<4, 8>()(
442
522k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
522k
      block, scratch_space);
444
2.61M
  for (size_t iy = 0; iy < 4; iy++) {
445
18.8M
    for (size_t ix = 0; ix < 8; ix++) {
446
16.7M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
16.7M
    }
448
2.09M
  }
449
522k
  float block00 = coefficients[0] * 0.25f;
450
522k
  float block01 = coefficients[1];
451
522k
  float block10 = coefficients[8];
452
522k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
522k
  coefficients[1] = (block00 - block01) * 0.5f;
454
522k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
522k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
261k
                            float* JXL_RESTRICT coefficients) {
411
261k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
261k
  size_t afv_x = afv_kind & 1;
413
261k
  size_t afv_y = afv_kind / 2;
414
261k
  HWY_ALIGN float block[4 * 8] = {};
415
1.30M
  for (size_t iy = 0; iy < 4; iy++) {
416
5.23M
    for (size_t ix = 0; ix < 4; ix++) {
417
4.18M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
4.18M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
4.18M
    }
420
1.04M
  }
421
  // AFV coefficients in (even, even) positions.
422
261k
  HWY_ALIGN float coeff[4 * 4];
423
261k
  AFVDCT4x4(block, coeff);
424
1.30M
  for (size_t iy = 0; iy < 4; iy++) {
425
5.23M
    for (size_t ix = 0; ix < 4; ix++) {
426
4.18M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
4.18M
    }
428
1.04M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
261k
  ComputeScaledDCT<4, 4>()(
431
261k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
261k
              pixels_stride),
433
261k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.30M
  for (size_t iy = 0; iy < 4; iy++) {
436
9.41M
    for (size_t ix = 0; ix < 8; ix++) {
437
8.36M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
8.36M
    }
439
1.04M
  }
440
  // 4x8 DCT of the other half of the block.
441
261k
  ComputeScaledDCT<4, 8>()(
442
261k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
261k
      block, scratch_space);
444
1.30M
  for (size_t iy = 0; iy < 4; iy++) {
445
9.41M
    for (size_t ix = 0; ix < 8; ix++) {
446
8.36M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
8.36M
    }
448
1.04M
  }
449
261k
  float block00 = coefficients[0] * 0.25f;
450
261k
  float block01 = coefficients[1];
451
261k
  float block10 = coefficients[8];
452
261k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
261k
  coefficients[1] = (block00 - block01) * 0.5f;
454
261k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
261k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
341k
                            float* JXL_RESTRICT coefficients) {
411
341k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
341k
  size_t afv_x = afv_kind & 1;
413
341k
  size_t afv_y = afv_kind / 2;
414
341k
  HWY_ALIGN float block[4 * 8] = {};
415
1.70M
  for (size_t iy = 0; iy < 4; iy++) {
416
6.83M
    for (size_t ix = 0; ix < 4; ix++) {
417
5.46M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
5.46M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
5.46M
    }
420
1.36M
  }
421
  // AFV coefficients in (even, even) positions.
422
341k
  HWY_ALIGN float coeff[4 * 4];
423
341k
  AFVDCT4x4(block, coeff);
424
1.70M
  for (size_t iy = 0; iy < 4; iy++) {
425
6.83M
    for (size_t ix = 0; ix < 4; ix++) {
426
5.46M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
5.46M
    }
428
1.36M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
341k
  ComputeScaledDCT<4, 4>()(
431
341k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
341k
              pixels_stride),
433
341k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.70M
  for (size_t iy = 0; iy < 4; iy++) {
436
12.2M
    for (size_t ix = 0; ix < 8; ix++) {
437
10.9M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
10.9M
    }
439
1.36M
  }
440
  // 4x8 DCT of the other half of the block.
441
341k
  ComputeScaledDCT<4, 8>()(
442
341k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
341k
      block, scratch_space);
444
1.70M
  for (size_t iy = 0; iy < 4; iy++) {
445
12.2M
    for (size_t ix = 0; ix < 8; ix++) {
446
10.9M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
10.9M
    }
448
1.36M
  }
449
341k
  float block00 = coefficients[0] * 0.25f;
450
341k
  float block01 = coefficients[1];
451
341k
  float block10 = coefficients[8];
452
341k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
341k
  coefficients[1] = (block00 - block01) * 0.5f;
454
341k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
341k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
17.9M
                            float* JXL_RESTRICT coefficients) {
411
17.9M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
17.9M
  size_t afv_x = afv_kind & 1;
413
17.9M
  size_t afv_y = afv_kind / 2;
414
17.9M
  HWY_ALIGN float block[4 * 8] = {};
415
89.7M
  for (size_t iy = 0; iy < 4; iy++) {
416
359M
    for (size_t ix = 0; ix < 4; ix++) {
417
287M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
287M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
287M
    }
420
71.8M
  }
421
  // AFV coefficients in (even, even) positions.
422
17.9M
  HWY_ALIGN float coeff[4 * 4];
423
17.9M
  AFVDCT4x4(block, coeff);
424
89.7M
  for (size_t iy = 0; iy < 4; iy++) {
425
359M
    for (size_t ix = 0; ix < 4; ix++) {
426
287M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
287M
    }
428
71.8M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
17.9M
  ComputeScaledDCT<4, 4>()(
431
17.9M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
17.9M
              pixels_stride),
433
17.9M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
89.7M
  for (size_t iy = 0; iy < 4; iy++) {
436
646M
    for (size_t ix = 0; ix < 8; ix++) {
437
574M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
574M
    }
439
71.8M
  }
440
  // 4x8 DCT of the other half of the block.
441
17.9M
  ComputeScaledDCT<4, 8>()(
442
17.9M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
17.9M
      block, scratch_space);
444
89.7M
  for (size_t iy = 0; iy < 4; iy++) {
445
646M
    for (size_t ix = 0; ix < 8; ix++) {
446
574M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
574M
    }
448
71.8M
  }
449
17.9M
  float block00 = coefficients[0] * 0.25f;
450
17.9M
  float block01 = coefficients[1];
451
17.9M
  float block10 = coefficients[8];
452
17.9M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
17.9M
  coefficients[1] = (block00 - block01) * 0.5f;
454
17.9M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
17.9M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
17.9M
                            float* JXL_RESTRICT coefficients) {
411
17.9M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
17.9M
  size_t afv_x = afv_kind & 1;
413
17.9M
  size_t afv_y = afv_kind / 2;
414
17.9M
  HWY_ALIGN float block[4 * 8] = {};
415
89.7M
  for (size_t iy = 0; iy < 4; iy++) {
416
359M
    for (size_t ix = 0; ix < 4; ix++) {
417
287M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
287M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
287M
    }
420
71.8M
  }
421
  // AFV coefficients in (even, even) positions.
422
17.9M
  HWY_ALIGN float coeff[4 * 4];
423
17.9M
  AFVDCT4x4(block, coeff);
424
89.7M
  for (size_t iy = 0; iy < 4; iy++) {
425
359M
    for (size_t ix = 0; ix < 4; ix++) {
426
287M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
287M
    }
428
71.8M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
17.9M
  ComputeScaledDCT<4, 4>()(
431
17.9M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
17.9M
              pixels_stride),
433
17.9M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
89.7M
  for (size_t iy = 0; iy < 4; iy++) {
436
646M
    for (size_t ix = 0; ix < 8; ix++) {
437
574M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
574M
    }
439
71.8M
  }
440
  // 4x8 DCT of the other half of the block.
441
17.9M
  ComputeScaledDCT<4, 8>()(
442
17.9M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
17.9M
      block, scratch_space);
444
89.7M
  for (size_t iy = 0; iy < 4; iy++) {
445
646M
    for (size_t ix = 0; ix < 8; ix++) {
446
574M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
574M
    }
448
71.8M
  }
449
17.9M
  float block00 = coefficients[0] * 0.25f;
450
17.9M
  float block01 = coefficients[1];
451
17.9M
  float block10 = coefficients[8];
452
17.9M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
17.9M
  coefficients[1] = (block00 - block01) * 0.5f;
454
17.9M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
17.9M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
17.9M
                            float* JXL_RESTRICT coefficients) {
411
17.9M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
17.9M
  size_t afv_x = afv_kind & 1;
413
17.9M
  size_t afv_y = afv_kind / 2;
414
17.9M
  HWY_ALIGN float block[4 * 8] = {};
415
89.7M
  for (size_t iy = 0; iy < 4; iy++) {
416
359M
    for (size_t ix = 0; ix < 4; ix++) {
417
287M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
287M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
287M
    }
420
71.8M
  }
421
  // AFV coefficients in (even, even) positions.
422
17.9M
  HWY_ALIGN float coeff[4 * 4];
423
17.9M
  AFVDCT4x4(block, coeff);
424
89.7M
  for (size_t iy = 0; iy < 4; iy++) {
425
359M
    for (size_t ix = 0; ix < 4; ix++) {
426
287M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
287M
    }
428
71.8M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
17.9M
  ComputeScaledDCT<4, 4>()(
431
17.9M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
17.9M
              pixels_stride),
433
17.9M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
89.7M
  for (size_t iy = 0; iy < 4; iy++) {
436
646M
    for (size_t ix = 0; ix < 8; ix++) {
437
574M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
574M
    }
439
71.8M
  }
440
  // 4x8 DCT of the other half of the block.
441
17.9M
  ComputeScaledDCT<4, 8>()(
442
17.9M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
17.9M
      block, scratch_space);
444
89.7M
  for (size_t iy = 0; iy < 4; iy++) {
445
646M
    for (size_t ix = 0; ix < 8; ix++) {
446
574M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
574M
    }
448
71.8M
  }
449
17.9M
  float block00 = coefficients[0] * 0.25f;
450
17.9M
  float block01 = coefficients[1];
451
17.9M
  float block10 = coefficients[8];
452
17.9M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
17.9M
  coefficients[1] = (block00 - block01) * 0.5f;
454
17.9M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
17.9M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
17.9M
                            float* JXL_RESTRICT coefficients) {
411
17.9M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
17.9M
  size_t afv_x = afv_kind & 1;
413
17.9M
  size_t afv_y = afv_kind / 2;
414
17.9M
  HWY_ALIGN float block[4 * 8] = {};
415
89.7M
  for (size_t iy = 0; iy < 4; iy++) {
416
359M
    for (size_t ix = 0; ix < 4; ix++) {
417
287M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
287M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
287M
    }
420
71.8M
  }
421
  // AFV coefficients in (even, even) positions.
422
17.9M
  HWY_ALIGN float coeff[4 * 4];
423
17.9M
  AFVDCT4x4(block, coeff);
424
89.7M
  for (size_t iy = 0; iy < 4; iy++) {
425
359M
    for (size_t ix = 0; ix < 4; ix++) {
426
287M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
287M
    }
428
71.8M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
17.9M
  ComputeScaledDCT<4, 4>()(
431
17.9M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
17.9M
              pixels_stride),
433
17.9M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
89.7M
  for (size_t iy = 0; iy < 4; iy++) {
436
646M
    for (size_t ix = 0; ix < 8; ix++) {
437
574M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
574M
    }
439
71.8M
  }
440
  // 4x8 DCT of the other half of the block.
441
17.9M
  ComputeScaledDCT<4, 8>()(
442
17.9M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
17.9M
      block, scratch_space);
444
89.7M
  for (size_t iy = 0; iy < 4; iy++) {
445
646M
    for (size_t ix = 0; ix < 8; ix++) {
446
574M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
574M
    }
448
71.8M
  }
449
17.9M
  float block00 = coefficients[0] * 0.25f;
450
17.9M
  float block01 = coefficients[1];
451
17.9M
  float block10 = coefficients[8];
452
17.9M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
17.9M
  coefficients[1] = (block00 - block01) * 0.5f;
454
17.9M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
17.9M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
296k
                            float* JXL_RESTRICT coefficients) {
411
296k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
296k
  size_t afv_x = afv_kind & 1;
413
296k
  size_t afv_y = afv_kind / 2;
414
296k
  HWY_ALIGN float block[4 * 8] = {};
415
1.48M
  for (size_t iy = 0; iy < 4; iy++) {
416
5.92M
    for (size_t ix = 0; ix < 4; ix++) {
417
4.73M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
4.73M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
4.73M
    }
420
1.18M
  }
421
  // AFV coefficients in (even, even) positions.
422
296k
  HWY_ALIGN float coeff[4 * 4];
423
296k
  AFVDCT4x4(block, coeff);
424
1.48M
  for (size_t iy = 0; iy < 4; iy++) {
425
5.92M
    for (size_t ix = 0; ix < 4; ix++) {
426
4.73M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
4.73M
    }
428
1.18M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
296k
  ComputeScaledDCT<4, 4>()(
431
296k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
296k
              pixels_stride),
433
296k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.48M
  for (size_t iy = 0; iy < 4; iy++) {
436
10.6M
    for (size_t ix = 0; ix < 8; ix++) {
437
9.47M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
9.47M
    }
439
1.18M
  }
440
  // 4x8 DCT of the other half of the block.
441
296k
  ComputeScaledDCT<4, 8>()(
442
296k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
296k
      block, scratch_space);
444
1.48M
  for (size_t iy = 0; iy < 4; iy++) {
445
10.6M
    for (size_t ix = 0; ix < 8; ix++) {
446
9.47M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
9.47M
    }
448
1.18M
  }
449
296k
  float block00 = coefficients[0] * 0.25f;
450
296k
  float block01 = coefficients[1];
451
296k
  float block10 = coefficients[8];
452
296k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
296k
  coefficients[1] = (block00 - block01) * 0.5f;
454
296k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
296k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
522k
                            float* JXL_RESTRICT coefficients) {
411
522k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
522k
  size_t afv_x = afv_kind & 1;
413
522k
  size_t afv_y = afv_kind / 2;
414
522k
  HWY_ALIGN float block[4 * 8] = {};
415
2.61M
  for (size_t iy = 0; iy < 4; iy++) {
416
10.4M
    for (size_t ix = 0; ix < 4; ix++) {
417
8.36M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
8.36M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
8.36M
    }
420
2.09M
  }
421
  // AFV coefficients in (even, even) positions.
422
522k
  HWY_ALIGN float coeff[4 * 4];
423
522k
  AFVDCT4x4(block, coeff);
424
2.61M
  for (size_t iy = 0; iy < 4; iy++) {
425
10.4M
    for (size_t ix = 0; ix < 4; ix++) {
426
8.36M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
8.36M
    }
428
2.09M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
522k
  ComputeScaledDCT<4, 4>()(
431
522k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
522k
              pixels_stride),
433
522k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
2.61M
  for (size_t iy = 0; iy < 4; iy++) {
436
18.8M
    for (size_t ix = 0; ix < 8; ix++) {
437
16.7M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
16.7M
    }
439
2.09M
  }
440
  // 4x8 DCT of the other half of the block.
441
522k
  ComputeScaledDCT<4, 8>()(
442
522k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
522k
      block, scratch_space);
444
2.61M
  for (size_t iy = 0; iy < 4; iy++) {
445
18.8M
    for (size_t ix = 0; ix < 8; ix++) {
446
16.7M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
16.7M
    }
448
2.09M
  }
449
522k
  float block00 = coefficients[0] * 0.25f;
450
522k
  float block01 = coefficients[1];
451
522k
  float block10 = coefficients[8];
452
522k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
522k
  coefficients[1] = (block00 - block01) * 0.5f;
454
522k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
522k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
261k
                            float* JXL_RESTRICT coefficients) {
411
261k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
261k
  size_t afv_x = afv_kind & 1;
413
261k
  size_t afv_y = afv_kind / 2;
414
261k
  HWY_ALIGN float block[4 * 8] = {};
415
1.30M
  for (size_t iy = 0; iy < 4; iy++) {
416
5.23M
    for (size_t ix = 0; ix < 4; ix++) {
417
4.18M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
4.18M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
4.18M
    }
420
1.04M
  }
421
  // AFV coefficients in (even, even) positions.
422
261k
  HWY_ALIGN float coeff[4 * 4];
423
261k
  AFVDCT4x4(block, coeff);
424
1.30M
  for (size_t iy = 0; iy < 4; iy++) {
425
5.23M
    for (size_t ix = 0; ix < 4; ix++) {
426
4.18M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
4.18M
    }
428
1.04M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
261k
  ComputeScaledDCT<4, 4>()(
431
261k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
261k
              pixels_stride),
433
261k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.30M
  for (size_t iy = 0; iy < 4; iy++) {
436
9.41M
    for (size_t ix = 0; ix < 8; ix++) {
437
8.36M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
8.36M
    }
439
1.04M
  }
440
  // 4x8 DCT of the other half of the block.
441
261k
  ComputeScaledDCT<4, 8>()(
442
261k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
261k
      block, scratch_space);
444
1.30M
  for (size_t iy = 0; iy < 4; iy++) {
445
9.41M
    for (size_t ix = 0; ix < 8; ix++) {
446
8.36M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
8.36M
    }
448
1.04M
  }
449
261k
  float block00 = coefficients[0] * 0.25f;
450
261k
  float block01 = coefficients[1];
451
261k
  float block10 = coefficients[8];
452
261k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
261k
  coefficients[1] = (block00 - block01) * 0.5f;
454
261k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
261k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
341k
                            float* JXL_RESTRICT coefficients) {
411
341k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
341k
  size_t afv_x = afv_kind & 1;
413
341k
  size_t afv_y = afv_kind / 2;
414
341k
  HWY_ALIGN float block[4 * 8] = {};
415
1.70M
  for (size_t iy = 0; iy < 4; iy++) {
416
6.83M
    for (size_t ix = 0; ix < 4; ix++) {
417
5.46M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
5.46M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
5.46M
    }
420
1.36M
  }
421
  // AFV coefficients in (even, even) positions.
422
341k
  HWY_ALIGN float coeff[4 * 4];
423
341k
  AFVDCT4x4(block, coeff);
424
1.70M
  for (size_t iy = 0; iy < 4; iy++) {
425
6.83M
    for (size_t ix = 0; ix < 4; ix++) {
426
5.46M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
5.46M
    }
428
1.36M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
341k
  ComputeScaledDCT<4, 4>()(
431
341k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
341k
              pixels_stride),
433
341k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.70M
  for (size_t iy = 0; iy < 4; iy++) {
436
12.2M
    for (size_t ix = 0; ix < 8; ix++) {
437
10.9M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
10.9M
    }
439
1.36M
  }
440
  // 4x8 DCT of the other half of the block.
441
341k
  ComputeScaledDCT<4, 8>()(
442
341k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
341k
      block, scratch_space);
444
1.70M
  for (size_t iy = 0; iy < 4; iy++) {
445
12.2M
    for (size_t ix = 0; ix < 8; ix++) {
446
10.9M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
10.9M
    }
448
1.36M
  }
449
341k
  float block00 = coefficients[0] * 0.25f;
450
341k
  float block01 = coefficients[1];
451
341k
  float block10 = coefficients[8];
452
341k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
341k
  coefficients[1] = (block00 - block01) * 0.5f;
454
341k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
341k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
456
457
HWY_MAYBE_UNUSED void TransformFromPixels(const AcStrategyType strategy,
458
                                          const float* JXL_RESTRICT pixels,
459
                                          size_t pixels_stride,
460
                                          float* JXL_RESTRICT coefficients,
461
261M
                                          float* JXL_RESTRICT scratch_space) {
462
261M
  using Type = AcStrategyType;
463
261M
  switch (strategy) {
464
20.2M
    case Type::IDENTITY: {
465
60.7M
      for (size_t y = 0; y < 2; y++) {
466
121M
        for (size_t x = 0; x < 2; x++) {
467
80.9M
          float block_dc = 0;
468
404M
          for (size_t iy = 0; iy < 4; iy++) {
469
1.61G
            for (size_t ix = 0; ix < 4; ix++) {
470
1.29G
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
1.29G
            }
472
323M
          }
473
80.9M
          block_dc *= 1.0f / 16;
474
404M
          for (size_t iy = 0; iy < 4; iy++) {
475
1.61G
            for (size_t ix = 0; ix < 4; ix++) {
476
1.29G
              if (ix == 1 && iy == 1) continue;
477
1.21G
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
1.21G
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
1.21G
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
1.21G
            }
481
323M
          }
482
80.9M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
80.9M
          coefficients[y * 8 + x] = block_dc;
484
80.9M
        }
485
40.4M
      }
486
20.2M
      float block00 = coefficients[0];
487
20.2M
      float block01 = coefficients[1];
488
20.2M
      float block10 = coefficients[8];
489
20.2M
      float block11 = coefficients[9];
490
20.2M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
20.2M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
20.2M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
20.2M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
20.2M
      break;
495
0
    }
496
18.9M
    case Type::DCT8X4: {
497
56.9M
      for (size_t x = 0; x < 2; x++) {
498
37.9M
        HWY_ALIGN float block[4 * 8];
499
37.9M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
37.9M
                                 scratch_space);
501
189M
        for (size_t iy = 0; iy < 4; iy++) {
502
1.36G
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
1.21G
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
1.21G
          }
506
151M
        }
507
37.9M
      }
508
18.9M
      float block0 = coefficients[0];
509
18.9M
      float block1 = coefficients[8];
510
18.9M
      coefficients[0] = (block0 + block1) * 0.5f;
511
18.9M
      coefficients[8] = (block0 - block1) * 0.5f;
512
18.9M
      break;
513
0
    }
514
18.6M
    case Type::DCT4X8: {
515
55.8M
      for (size_t y = 0; y < 2; y++) {
516
37.2M
        HWY_ALIGN float block[4 * 8];
517
37.2M
        ComputeScaledDCT<4, 8>()(
518
37.2M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
37.2M
            scratch_space);
520
186M
        for (size_t iy = 0; iy < 4; iy++) {
521
1.34G
          for (size_t ix = 0; ix < 8; ix++) {
522
1.19G
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
1.19G
          }
524
148M
        }
525
37.2M
      }
526
18.6M
      float block0 = coefficients[0];
527
18.6M
      float block1 = coefficients[8];
528
18.6M
      coefficients[0] = (block0 + block1) * 0.5f;
529
18.6M
      coefficients[8] = (block0 - block1) * 0.5f;
530
18.6M
      break;
531
0
    }
532
17.9M
    case Type::DCT4X4: {
533
53.8M
      for (size_t y = 0; y < 2; y++) {
534
107M
        for (size_t x = 0; x < 2; x++) {
535
71.8M
          HWY_ALIGN float block[4 * 4];
536
71.8M
          ComputeScaledDCT<4, 4>()(
537
71.8M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
71.8M
              block, scratch_space);
539
359M
          for (size_t iy = 0; iy < 4; iy++) {
540
1.43G
            for (size_t ix = 0; ix < 4; ix++) {
541
1.14G
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
1.14G
            }
543
287M
          }
544
71.8M
        }
545
35.9M
      }
546
17.9M
      float block00 = coefficients[0];
547
17.9M
      float block01 = coefficients[1];
548
17.9M
      float block10 = coefficients[8];
549
17.9M
      float block11 = coefficients[9];
550
17.9M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
17.9M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
17.9M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
17.9M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
17.9M
      break;
555
0
    }
556
20.2M
    case Type::DCT2X2: {
557
20.2M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
20.2M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
20.2M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
20.2M
      break;
561
0
    }
562
7.84M
    case Type::DCT16X16: {
563
7.84M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
7.84M
                                 scratch_space);
565
7.84M
      break;
566
0
    }
567
15.2M
    case Type::DCT16X8: {
568
15.2M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
15.2M
                                scratch_space);
570
15.2M
      break;
571
0
    }
572
15.5M
    case Type::DCT8X16: {
573
15.5M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
15.5M
                                scratch_space);
575
15.5M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
3.04M
    case Type::DCT32X16: {
588
3.04M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
3.04M
                                 scratch_space);
590
3.04M
      break;
591
0
    }
592
3.17M
    case Type::DCT16X32: {
593
3.17M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
3.17M
                                 scratch_space);
595
3.17M
      break;
596
0
    }
597
1.86M
    case Type::DCT32X32: {
598
1.86M
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
1.86M
                                 scratch_space);
600
1.86M
      break;
601
0
    }
602
42.1M
    case Type::DCT: {
603
42.1M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
42.1M
                               scratch_space);
605
42.1M
      break;
606
0
    }
607
18.5M
    case Type::AFV0: {
608
18.5M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
18.5M
      break;
610
0
    }
611
19.0M
    case Type::AFV1: {
612
19.0M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
19.0M
      break;
614
0
    }
615
18.4M
    case Type::AFV2: {
616
18.4M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
18.4M
      break;
618
0
    }
619
18.6M
    case Type::AFV3: {
620
18.6M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
18.6M
      break;
622
0
    }
623
316k
    case Type::DCT64X64: {
624
316k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
316k
                                 scratch_space);
626
316k
      break;
627
0
    }
628
937k
    case Type::DCT64X32: {
629
937k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
937k
                                 scratch_space);
631
937k
      break;
632
0
    }
633
654k
    case Type::DCT32X64: {
634
654k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
654k
                                 scratch_space);
636
654k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
261M
  }
669
261M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
9.12M
                                          float* JXL_RESTRICT scratch_space) {
462
9.12M
  using Type = AcStrategyType;
463
9.12M
  switch (strategy) {
464
1.13M
    case Type::IDENTITY: {
465
3.41M
      for (size_t y = 0; y < 2; y++) {
466
6.83M
        for (size_t x = 0; x < 2; x++) {
467
4.55M
          float block_dc = 0;
468
22.7M
          for (size_t iy = 0; iy < 4; iy++) {
469
91.1M
            for (size_t ix = 0; ix < 4; ix++) {
470
72.9M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
72.9M
            }
472
18.2M
          }
473
4.55M
          block_dc *= 1.0f / 16;
474
22.7M
          for (size_t iy = 0; iy < 4; iy++) {
475
91.1M
            for (size_t ix = 0; ix < 4; ix++) {
476
72.9M
              if (ix == 1 && iy == 1) continue;
477
68.3M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
68.3M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
68.3M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
68.3M
            }
481
18.2M
          }
482
4.55M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
4.55M
          coefficients[y * 8 + x] = block_dc;
484
4.55M
        }
485
2.27M
      }
486
1.13M
      float block00 = coefficients[0];
487
1.13M
      float block01 = coefficients[1];
488
1.13M
      float block10 = coefficients[8];
489
1.13M
      float block11 = coefficients[9];
490
1.13M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
1.13M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
1.13M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
1.13M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
1.13M
      break;
495
0
    }
496
505k
    case Type::DCT8X4: {
497
1.51M
      for (size_t x = 0; x < 2; x++) {
498
1.01M
        HWY_ALIGN float block[4 * 8];
499
1.01M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
1.01M
                                 scratch_space);
501
5.05M
        for (size_t iy = 0; iy < 4; iy++) {
502
36.4M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
32.3M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
32.3M
          }
506
4.04M
        }
507
1.01M
      }
508
505k
      float block0 = coefficients[0];
509
505k
      float block1 = coefficients[8];
510
505k
      coefficients[0] = (block0 + block1) * 0.5f;
511
505k
      coefficients[8] = (block0 - block1) * 0.5f;
512
505k
      break;
513
0
    }
514
331k
    case Type::DCT4X8: {
515
993k
      for (size_t y = 0; y < 2; y++) {
516
662k
        HWY_ALIGN float block[4 * 8];
517
662k
        ComputeScaledDCT<4, 8>()(
518
662k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
662k
            scratch_space);
520
3.31M
        for (size_t iy = 0; iy < 4; iy++) {
521
23.8M
          for (size_t ix = 0; ix < 8; ix++) {
522
21.1M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
21.1M
          }
524
2.64M
        }
525
662k
      }
526
331k
      float block0 = coefficients[0];
527
331k
      float block1 = coefficients[8];
528
331k
      coefficients[0] = (block0 + block1) * 0.5f;
529
331k
      coefficients[8] = (block0 - block1) * 0.5f;
530
331k
      break;
531
0
    }
532
2.45k
    case Type::DCT4X4: {
533
7.36k
      for (size_t y = 0; y < 2; y++) {
534
14.7k
        for (size_t x = 0; x < 2; x++) {
535
9.81k
          HWY_ALIGN float block[4 * 4];
536
9.81k
          ComputeScaledDCT<4, 4>()(
537
9.81k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
9.81k
              block, scratch_space);
539
49.0k
          for (size_t iy = 0; iy < 4; iy++) {
540
196k
            for (size_t ix = 0; ix < 4; ix++) {
541
157k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
157k
            }
543
39.2k
          }
544
9.81k
        }
545
4.90k
      }
546
2.45k
      float block00 = coefficients[0];
547
2.45k
      float block01 = coefficients[1];
548
2.45k
      float block10 = coefficients[8];
549
2.45k
      float block11 = coefficients[9];
550
2.45k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
2.45k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
2.45k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
2.45k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
2.45k
      break;
555
0
    }
556
1.13M
    case Type::DCT2X2: {
557
1.13M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
1.13M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
1.13M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
1.13M
      break;
561
0
    }
562
193k
    case Type::DCT16X16: {
563
193k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
193k
                                 scratch_space);
565
193k
      break;
566
0
    }
567
328k
    case Type::DCT16X8: {
568
328k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
328k
                                scratch_space);
570
328k
      break;
571
0
    }
572
454k
    case Type::DCT8X16: {
573
454k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
454k
                                scratch_space);
575
454k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
89.7k
    case Type::DCT32X16: {
588
89.7k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
89.7k
                                 scratch_space);
590
89.7k
      break;
591
0
    }
592
143k
    case Type::DCT16X32: {
593
143k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
143k
                                 scratch_space);
595
143k
      break;
596
0
    }
597
198k
    case Type::DCT32X32: {
598
198k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
198k
                                 scratch_space);
600
198k
      break;
601
0
    }
602
3.13M
    case Type::DCT: {
603
3.13M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
3.13M
                               scratch_space);
605
3.13M
      break;
606
0
    }
607
296k
    case Type::AFV0: {
608
296k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
296k
      break;
610
0
    }
611
522k
    case Type::AFV1: {
612
522k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
522k
      break;
614
0
    }
615
261k
    case Type::AFV2: {
616
261k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
261k
      break;
618
0
    }
619
341k
    case Type::AFV3: {
620
341k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
341k
      break;
622
0
    }
623
36.8k
    case Type::DCT64X64: {
624
36.8k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
36.8k
                                 scratch_space);
626
36.8k
      break;
627
0
    }
628
6.19k
    case Type::DCT64X32: {
629
6.19k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
6.19k
                                 scratch_space);
631
6.19k
      break;
632
0
    }
633
10.9k
    case Type::DCT32X64: {
634
10.9k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
10.9k
                                 scratch_space);
636
10.9k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
9.12M
  }
669
9.12M
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
225M
                                          float* JXL_RESTRICT scratch_space) {
462
225M
  using Type = AcStrategyType;
463
225M
  switch (strategy) {
464
17.9M
    case Type::IDENTITY: {
465
53.8M
      for (size_t y = 0; y < 2; y++) {
466
107M
        for (size_t x = 0; x < 2; x++) {
467
71.8M
          float block_dc = 0;
468
359M
          for (size_t iy = 0; iy < 4; iy++) {
469
1.43G
            for (size_t ix = 0; ix < 4; ix++) {
470
1.14G
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
1.14G
            }
472
287M
          }
473
71.8M
          block_dc *= 1.0f / 16;
474
359M
          for (size_t iy = 0; iy < 4; iy++) {
475
1.43G
            for (size_t ix = 0; ix < 4; ix++) {
476
1.14G
              if (ix == 1 && iy == 1) continue;
477
1.07G
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
1.07G
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
1.07G
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
1.07G
            }
481
287M
          }
482
71.8M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
71.8M
          coefficients[y * 8 + x] = block_dc;
484
71.8M
        }
485
35.9M
      }
486
17.9M
      float block00 = coefficients[0];
487
17.9M
      float block01 = coefficients[1];
488
17.9M
      float block10 = coefficients[8];
489
17.9M
      float block11 = coefficients[9];
490
17.9M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
17.9M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
17.9M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
17.9M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
17.9M
      break;
495
0
    }
496
17.9M
    case Type::DCT8X4: {
497
53.8M
      for (size_t x = 0; x < 2; x++) {
498
35.9M
        HWY_ALIGN float block[4 * 8];
499
35.9M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
35.9M
                                 scratch_space);
501
179M
        for (size_t iy = 0; iy < 4; iy++) {
502
1.29G
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
1.14G
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
1.14G
          }
506
143M
        }
507
35.9M
      }
508
17.9M
      float block0 = coefficients[0];
509
17.9M
      float block1 = coefficients[8];
510
17.9M
      coefficients[0] = (block0 + block1) * 0.5f;
511
17.9M
      coefficients[8] = (block0 - block1) * 0.5f;
512
17.9M
      break;
513
0
    }
514
17.9M
    case Type::DCT4X8: {
515
53.8M
      for (size_t y = 0; y < 2; y++) {
516
35.9M
        HWY_ALIGN float block[4 * 8];
517
35.9M
        ComputeScaledDCT<4, 8>()(
518
35.9M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
35.9M
            scratch_space);
520
179M
        for (size_t iy = 0; iy < 4; iy++) {
521
1.29G
          for (size_t ix = 0; ix < 8; ix++) {
522
1.14G
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
1.14G
          }
524
143M
        }
525
35.9M
      }
526
17.9M
      float block0 = coefficients[0];
527
17.9M
      float block1 = coefficients[8];
528
17.9M
      coefficients[0] = (block0 + block1) * 0.5f;
529
17.9M
      coefficients[8] = (block0 - block1) * 0.5f;
530
17.9M
      break;
531
0
    }
532
17.9M
    case Type::DCT4X4: {
533
53.8M
      for (size_t y = 0; y < 2; y++) {
534
107M
        for (size_t x = 0; x < 2; x++) {
535
71.8M
          HWY_ALIGN float block[4 * 4];
536
71.8M
          ComputeScaledDCT<4, 4>()(
537
71.8M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
71.8M
              block, scratch_space);
539
359M
          for (size_t iy = 0; iy < 4; iy++) {
540
1.43G
            for (size_t ix = 0; ix < 4; ix++) {
541
1.14G
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
1.14G
            }
543
287M
          }
544
71.8M
        }
545
35.9M
      }
546
17.9M
      float block00 = coefficients[0];
547
17.9M
      float block01 = coefficients[1];
548
17.9M
      float block10 = coefficients[8];
549
17.9M
      float block11 = coefficients[9];
550
17.9M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
17.9M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
17.9M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
17.9M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
17.9M
      break;
555
0
    }
556
17.9M
    case Type::DCT2X2: {
557
17.9M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
17.9M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
17.9M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
17.9M
      break;
561
0
    }
562
7.46M
    case Type::DCT16X16: {
563
7.46M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
7.46M
                                 scratch_space);
565
7.46M
      break;
566
0
    }
567
14.5M
    case Type::DCT16X8: {
568
14.5M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
14.5M
                                scratch_space);
570
14.5M
      break;
571
0
    }
572
14.6M
    case Type::DCT8X16: {
573
14.6M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
14.6M
                                scratch_space);
575
14.6M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
2.86M
    case Type::DCT32X16: {
588
2.86M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
2.86M
                                 scratch_space);
590
2.86M
      break;
591
0
    }
592
2.88M
    case Type::DCT16X32: {
593
2.88M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
2.88M
                                 scratch_space);
595
2.88M
      break;
596
0
    }
597
1.46M
    case Type::DCT32X32: {
598
1.46M
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
1.46M
                                 scratch_space);
600
1.46M
      break;
601
0
    }
602
17.9M
    case Type::DCT: {
603
17.9M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
17.9M
                               scratch_space);
605
17.9M
      break;
606
0
    }
607
17.9M
    case Type::AFV0: {
608
17.9M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
17.9M
      break;
610
0
    }
611
17.9M
    case Type::AFV1: {
612
17.9M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
17.9M
      break;
614
0
    }
615
17.9M
    case Type::AFV2: {
616
17.9M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
17.9M
      break;
618
0
    }
619
17.9M
    case Type::AFV3: {
620
17.9M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
17.9M
      break;
622
0
    }
623
242k
    case Type::DCT64X64: {
624
242k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
242k
                                 scratch_space);
626
242k
      break;
627
0
    }
628
924k
    case Type::DCT64X32: {
629
924k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
924k
                                 scratch_space);
631
924k
      break;
632
0
    }
633
632k
    case Type::DCT32X64: {
634
632k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
632k
                                 scratch_space);
636
632k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
225M
  }
669
225M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
27.0M
                                          float* JXL_RESTRICT scratch_space) {
462
27.0M
  using Type = AcStrategyType;
463
27.0M
  switch (strategy) {
464
1.13M
    case Type::IDENTITY: {
465
3.41M
      for (size_t y = 0; y < 2; y++) {
466
6.83M
        for (size_t x = 0; x < 2; x++) {
467
4.55M
          float block_dc = 0;
468
22.7M
          for (size_t iy = 0; iy < 4; iy++) {
469
91.1M
            for (size_t ix = 0; ix < 4; ix++) {
470
72.9M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
72.9M
            }
472
18.2M
          }
473
4.55M
          block_dc *= 1.0f / 16;
474
22.7M
          for (size_t iy = 0; iy < 4; iy++) {
475
91.1M
            for (size_t ix = 0; ix < 4; ix++) {
476
72.9M
              if (ix == 1 && iy == 1) continue;
477
68.3M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
68.3M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
68.3M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
68.3M
            }
481
18.2M
          }
482
4.55M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
4.55M
          coefficients[y * 8 + x] = block_dc;
484
4.55M
        }
485
2.27M
      }
486
1.13M
      float block00 = coefficients[0];
487
1.13M
      float block01 = coefficients[1];
488
1.13M
      float block10 = coefficients[8];
489
1.13M
      float block11 = coefficients[9];
490
1.13M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
1.13M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
1.13M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
1.13M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
1.13M
      break;
495
0
    }
496
505k
    case Type::DCT8X4: {
497
1.51M
      for (size_t x = 0; x < 2; x++) {
498
1.01M
        HWY_ALIGN float block[4 * 8];
499
1.01M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
1.01M
                                 scratch_space);
501
5.05M
        for (size_t iy = 0; iy < 4; iy++) {
502
36.4M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
32.3M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
32.3M
          }
506
4.04M
        }
507
1.01M
      }
508
505k
      float block0 = coefficients[0];
509
505k
      float block1 = coefficients[8];
510
505k
      coefficients[0] = (block0 + block1) * 0.5f;
511
505k
      coefficients[8] = (block0 - block1) * 0.5f;
512
505k
      break;
513
0
    }
514
331k
    case Type::DCT4X8: {
515
993k
      for (size_t y = 0; y < 2; y++) {
516
662k
        HWY_ALIGN float block[4 * 8];
517
662k
        ComputeScaledDCT<4, 8>()(
518
662k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
662k
            scratch_space);
520
3.31M
        for (size_t iy = 0; iy < 4; iy++) {
521
23.8M
          for (size_t ix = 0; ix < 8; ix++) {
522
21.1M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
21.1M
          }
524
2.64M
        }
525
662k
      }
526
331k
      float block0 = coefficients[0];
527
331k
      float block1 = coefficients[8];
528
331k
      coefficients[0] = (block0 + block1) * 0.5f;
529
331k
      coefficients[8] = (block0 - block1) * 0.5f;
530
331k
      break;
531
0
    }
532
2.45k
    case Type::DCT4X4: {
533
7.36k
      for (size_t y = 0; y < 2; y++) {
534
14.7k
        for (size_t x = 0; x < 2; x++) {
535
9.81k
          HWY_ALIGN float block[4 * 4];
536
9.81k
          ComputeScaledDCT<4, 4>()(
537
9.81k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
9.81k
              block, scratch_space);
539
49.0k
          for (size_t iy = 0; iy < 4; iy++) {
540
196k
            for (size_t ix = 0; ix < 4; ix++) {
541
157k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
157k
            }
543
39.2k
          }
544
9.81k
        }
545
4.90k
      }
546
2.45k
      float block00 = coefficients[0];
547
2.45k
      float block01 = coefficients[1];
548
2.45k
      float block10 = coefficients[8];
549
2.45k
      float block11 = coefficients[9];
550
2.45k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
2.45k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
2.45k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
2.45k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
2.45k
      break;
555
0
    }
556
1.13M
    case Type::DCT2X2: {
557
1.13M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
1.13M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
1.13M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
1.13M
      break;
561
0
    }
562
193k
    case Type::DCT16X16: {
563
193k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
193k
                                 scratch_space);
565
193k
      break;
566
0
    }
567
328k
    case Type::DCT16X8: {
568
328k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
328k
                                scratch_space);
570
328k
      break;
571
0
    }
572
454k
    case Type::DCT8X16: {
573
454k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
454k
                                scratch_space);
575
454k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
89.7k
    case Type::DCT32X16: {
588
89.7k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
89.7k
                                 scratch_space);
590
89.7k
      break;
591
0
    }
592
143k
    case Type::DCT16X32: {
593
143k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
143k
                                 scratch_space);
595
143k
      break;
596
0
    }
597
198k
    case Type::DCT32X32: {
598
198k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
198k
                                 scratch_space);
600
198k
      break;
601
0
    }
602
21.0M
    case Type::DCT: {
603
21.0M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
21.0M
                               scratch_space);
605
21.0M
      break;
606
0
    }
607
296k
    case Type::AFV0: {
608
296k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
296k
      break;
610
0
    }
611
522k
    case Type::AFV1: {
612
522k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
522k
      break;
614
0
    }
615
261k
    case Type::AFV2: {
616
261k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
261k
      break;
618
0
    }
619
341k
    case Type::AFV3: {
620
341k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
341k
      break;
622
0
    }
623
36.8k
    case Type::DCT64X64: {
624
36.8k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
36.8k
                                 scratch_space);
626
36.8k
      break;
627
0
    }
628
6.19k
    case Type::DCT64X32: {
629
6.19k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
6.19k
                                 scratch_space);
631
6.19k
      break;
632
0
    }
633
10.9k
    case Type::DCT32X64: {
634
10.9k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
10.9k
                                 scratch_space);
636
10.9k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
27.0M
  }
669
27.0M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
670
671
// `scratch_space` should be at least 4 * kMaxBlocks * kMaxBlocks elements.
672
HWY_MAYBE_UNUSED void DCFromLowestFrequencies(const AcStrategyType strategy,
673
                                              const float* block, float* dc,
674
                                              size_t dc_stride,
675
36.2M
                                              float* scratch_space) {
676
36.2M
  using Type = AcStrategyType;
677
36.2M
  switch (strategy) {
678
656k
    case Type::DCT16X8: {
679
656k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
656k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
656k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
656k
      break;
683
0
    }
684
909k
    case Type::DCT8X16: {
685
909k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
909k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
909k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
909k
      break;
689
0
    }
690
386k
    case Type::DCT16X16: {
691
386k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
386k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
386k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
386k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
179k
    case Type::DCT32X16: {
709
179k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
179k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
179k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
179k
      break;
713
0
    }
714
287k
    case Type::DCT16X32: {
715
287k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
287k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
287k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
287k
      break;
719
0
    }
720
397k
    case Type::DCT32X32: {
721
397k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
397k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
397k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
397k
      break;
725
0
    }
726
12.3k
    case Type::DCT64X32: {
727
12.3k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
12.3k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
12.3k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
12.3k
      break;
731
0
    }
732
21.8k
    case Type::DCT32X64: {
733
21.8k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
21.8k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
21.8k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
21.8k
      break;
737
0
    }
738
73.7k
    case Type::DCT64X64: {
739
73.7k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
73.7k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
73.7k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
73.7k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
24.2M
    case Type::DCT:
787
26.4M
    case Type::DCT2X2:
788
26.4M
    case Type::DCT4X4:
789
27.1M
    case Type::DCT4X8:
790
28.1M
    case Type::DCT8X4:
791
28.7M
    case Type::AFV0:
792
29.7M
    case Type::AFV1:
793
30.3M
    case Type::AFV2:
794
31.0M
    case Type::AFV3:
795
33.2M
    case Type::IDENTITY:
796
33.2M
      dc[0] = block[0];
797
33.2M
      break;
798
36.2M
  }
799
36.2M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
9.12M
                                              float* scratch_space) {
676
9.12M
  using Type = AcStrategyType;
677
9.12M
  switch (strategy) {
678
328k
    case Type::DCT16X8: {
679
328k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
328k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
328k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
328k
      break;
683
0
    }
684
454k
    case Type::DCT8X16: {
685
454k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
454k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
454k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
454k
      break;
689
0
    }
690
193k
    case Type::DCT16X16: {
691
193k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
193k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
193k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
193k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
89.7k
    case Type::DCT32X16: {
709
89.7k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
89.7k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
89.7k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
89.7k
      break;
713
0
    }
714
143k
    case Type::DCT16X32: {
715
143k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
143k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
143k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
143k
      break;
719
0
    }
720
198k
    case Type::DCT32X32: {
721
198k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
198k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
198k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
198k
      break;
725
0
    }
726
6.19k
    case Type::DCT64X32: {
727
6.19k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
6.19k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
6.19k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
6.19k
      break;
731
0
    }
732
10.9k
    case Type::DCT32X64: {
733
10.9k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
10.9k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
10.9k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
10.9k
      break;
737
0
    }
738
36.8k
    case Type::DCT64X64: {
739
36.8k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
36.8k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
36.8k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
36.8k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
3.13M
    case Type::DCT:
787
4.26M
    case Type::DCT2X2:
788
4.26M
    case Type::DCT4X4:
789
4.59M
    case Type::DCT4X8:
790
5.10M
    case Type::DCT8X4:
791
5.39M
    case Type::AFV0:
792
5.92M
    case Type::AFV1:
793
6.18M
    case Type::AFV2:
794
6.52M
    case Type::AFV3:
795
7.66M
    case Type::IDENTITY:
796
7.66M
      dc[0] = block[0];
797
7.66M
      break;
798
9.12M
  }
799
9.12M
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
27.0M
                                              float* scratch_space) {
676
27.0M
  using Type = AcStrategyType;
677
27.0M
  switch (strategy) {
678
328k
    case Type::DCT16X8: {
679
328k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
328k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
328k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
328k
      break;
683
0
    }
684
454k
    case Type::DCT8X16: {
685
454k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
454k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
454k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
454k
      break;
689
0
    }
690
193k
    case Type::DCT16X16: {
691
193k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
193k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
193k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
193k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
89.7k
    case Type::DCT32X16: {
709
89.7k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
89.7k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
89.7k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
89.7k
      break;
713
0
    }
714
143k
    case Type::DCT16X32: {
715
143k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
143k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
143k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
143k
      break;
719
0
    }
720
198k
    case Type::DCT32X32: {
721
198k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
198k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
198k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
198k
      break;
725
0
    }
726
6.19k
    case Type::DCT64X32: {
727
6.19k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
6.19k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
6.19k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
6.19k
      break;
731
0
    }
732
10.9k
    case Type::DCT32X64: {
733
10.9k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
10.9k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
10.9k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
10.9k
      break;
737
0
    }
738
36.8k
    case Type::DCT64X64: {
739
36.8k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
36.8k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
36.8k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
36.8k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
21.0M
    case Type::DCT:
787
22.2M
    case Type::DCT2X2:
788
22.2M
    case Type::DCT4X4:
789
22.5M
    case Type::DCT4X8:
790
23.0M
    case Type::DCT8X4:
791
23.3M
    case Type::AFV0:
792
23.8M
    case Type::AFV1:
793
24.1M
    case Type::AFV2:
794
24.4M
    case Type::AFV3:
795
25.6M
    case Type::IDENTITY:
796
25.6M
      dc[0] = block[0];
797
25.6M
      break;
798
27.0M
  }
799
27.0M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
800
801
}  // namespace
802
// NOLINTNEXTLINE(google-readability-namespace-comments)
803
}  // namespace HWY_NAMESPACE
804
}  // namespace jxl
805
HWY_AFTER_NAMESPACE();
806
807
#endif  // LIB_JXL_ENC_TRANSFORMS_INL_H_