Coverage Report

Created: 2025-07-23 08:18

/src/libjxl/lib/jxl/enc_transforms-inl.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/base/compiler_specific.h"
7
#include "lib/jxl/frame_dimensions.h"
8
9
#if defined(LIB_JXL_ENC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
10
#ifdef LIB_JXL_ENC_TRANSFORMS_INL_H_
11
#undef LIB_JXL_ENC_TRANSFORMS_INL_H_
12
#else
13
#define LIB_JXL_ENC_TRANSFORMS_INL_H_
14
#endif
15
16
#include <cstddef>
17
#include <cstdint>
18
#include <hwy/highway.h>
19
20
#include "lib/jxl/ac_strategy.h"
21
#include "lib/jxl/dct-inl.h"
22
#include "lib/jxl/dct_scales.h"
23
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
27
enum class AcStrategyType : uint32_t;
28
29
namespace HWY_NAMESPACE {
30
namespace {
31
32
constexpr size_t kMaxBlocks = 32;
33
34
// Inverse of ReinterpretingDCT.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
HWY_INLINE void ReinterpretingIDCT(const float* input,
38
                                   const size_t input_stride, float* output,
39
1.68M
                                   const size_t output_stride, float* scratch) {
40
1.68M
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
1.68M
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
1.68M
  float* block = scratch;
43
1.68M
  if (ROWS < COLS) {
44
1.61M
    for (size_t y = 0; y < LF_ROWS; y++) {
45
3.61M
      for (size_t x = 0; x < LF_COLS; x++) {
46
2.71M
        block[y * COLS + x] = input[y * input_stride + x] *
47
2.71M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
2.71M
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
2.71M
      }
50
899k
    }
51
971k
  } else {
52
2.85M
    for (size_t y = 0; y < LF_COLS; y++) {
53
8.70M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
6.81M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
6.81M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
6.81M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
6.81M
      }
58
1.88M
    }
59
971k
  }
60
61
1.68M
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
1.68M
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
1.68M
                                  scratch_space);
64
1.68M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
236k
                                   const size_t output_stride, float* scratch) {
40
236k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
236k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
236k
  float* block = scratch;
43
236k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
236k
  } else {
52
472k
    for (size_t y = 0; y < LF_COLS; y++) {
53
708k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
472k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
472k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
472k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
472k
      }
58
236k
    }
59
236k
  }
60
61
236k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
236k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
236k
                                  scratch_space);
64
236k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
279k
                                   const size_t output_stride, float* scratch) {
40
279k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
279k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
279k
  float* block = scratch;
43
279k
  if (ROWS < COLS) {
44
559k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
838k
      for (size_t x = 0; x < LF_COLS; x++) {
46
559k
        block[y * COLS + x] = input[y * input_stride + x] *
47
559k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
559k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
559k
      }
50
279k
    }
51
279k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
279k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
279k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
279k
                                  scratch_space);
64
279k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
145k
                                   const size_t output_stride, float* scratch) {
40
145k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
145k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
145k
  float* block = scratch;
43
145k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
145k
  } else {
52
436k
    for (size_t y = 0; y < LF_COLS; y++) {
53
872k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
581k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
581k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
581k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
581k
      }
58
290k
    }
59
145k
  }
60
61
145k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
145k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
145k
                                  scratch_space);
64
145k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
41.0k
                                   const size_t output_stride, float* scratch) {
40
41.0k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
41.0k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
41.0k
  float* block = scratch;
43
41.0k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
41.0k
  } else {
52
123k
    for (size_t y = 0; y < LF_COLS; y++) {
53
410k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
328k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
328k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
328k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
328k
      }
58
82.0k
    }
59
41.0k
  }
60
61
41.0k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
41.0k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
41.0k
                                  scratch_space);
64
41.0k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
70.2k
                                   const size_t output_stride, float* scratch) {
40
70.2k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
70.2k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
70.2k
  float* block = scratch;
43
70.2k
  if (ROWS < COLS) {
44
210k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
702k
      for (size_t x = 0; x < LF_COLS; x++) {
46
562k
        block[y * COLS + x] = input[y * input_stride + x] *
47
562k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
562k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
562k
      }
50
140k
    }
51
70.2k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
70.2k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
70.2k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
70.2k
                                  scratch_space);
64
70.2k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
38.9k
                                   const size_t output_stride, float* scratch) {
40
38.9k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
38.9k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
38.9k
  float* block = scratch;
43
38.9k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
38.9k
  } else {
52
194k
    for (size_t y = 0; y < LF_COLS; y++) {
53
778k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
622k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
622k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
622k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
622k
      }
58
155k
    }
59
38.9k
  }
60
61
38.9k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
38.9k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
38.9k
                                  scratch_space);
64
38.9k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
4.76k
                                   const size_t output_stride, float* scratch) {
40
4.76k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
4.76k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
4.76k
  float* block = scratch;
43
4.76k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
4.76k
  } else {
52
23.8k
    for (size_t y = 0; y < LF_COLS; y++) {
53
171k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
152k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
152k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
152k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
152k
      }
58
19.0k
    }
59
4.76k
  }
60
61
4.76k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
4.76k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
4.76k
                                  scratch_space);
64
4.76k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
7.35k
                                   const size_t output_stride, float* scratch) {
40
7.35k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
7.35k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
7.35k
  float* block = scratch;
43
7.35k
  if (ROWS < COLS) {
44
36.7k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
264k
      for (size_t x = 0; x < LF_COLS; x++) {
46
235k
        block[y * COLS + x] = input[y * input_stride + x] *
47
235k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
235k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
235k
      }
50
29.4k
    }
51
7.35k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
7.35k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
7.35k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
7.35k
                                  scratch_space);
64
7.35k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
19.5k
                                   const size_t output_stride, float* scratch) {
40
19.5k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
19.5k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
19.5k
  float* block = scratch;
43
19.5k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
19.5k
  } else {
52
176k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.40M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
1.25M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
1.25M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
1.25M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
1.25M
      }
58
156k
    }
59
19.5k
  }
60
61
19.5k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
19.5k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
19.5k
                                  scratch_space);
64
19.5k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
236k
                                   const size_t output_stride, float* scratch) {
40
236k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
236k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
236k
  float* block = scratch;
43
236k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
236k
  } else {
52
472k
    for (size_t y = 0; y < LF_COLS; y++) {
53
708k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
472k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
472k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
472k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
472k
      }
58
236k
    }
59
236k
  }
60
61
236k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
236k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
236k
                                  scratch_space);
64
236k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
279k
                                   const size_t output_stride, float* scratch) {
40
279k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
279k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
279k
  float* block = scratch;
43
279k
  if (ROWS < COLS) {
44
559k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
838k
      for (size_t x = 0; x < LF_COLS; x++) {
46
559k
        block[y * COLS + x] = input[y * input_stride + x] *
47
559k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
559k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
559k
      }
50
279k
    }
51
279k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
279k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
279k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
279k
                                  scratch_space);
64
279k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
145k
                                   const size_t output_stride, float* scratch) {
40
145k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
145k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
145k
  float* block = scratch;
43
145k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
145k
  } else {
52
436k
    for (size_t y = 0; y < LF_COLS; y++) {
53
872k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
581k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
581k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
581k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
581k
      }
58
290k
    }
59
145k
  }
60
61
145k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
145k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
145k
                                  scratch_space);
64
145k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
41.0k
                                   const size_t output_stride, float* scratch) {
40
41.0k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
41.0k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
41.0k
  float* block = scratch;
43
41.0k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
41.0k
  } else {
52
123k
    for (size_t y = 0; y < LF_COLS; y++) {
53
410k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
328k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
328k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
328k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
328k
      }
58
82.0k
    }
59
41.0k
  }
60
61
41.0k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
41.0k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
41.0k
                                  scratch_space);
64
41.0k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
70.2k
                                   const size_t output_stride, float* scratch) {
40
70.2k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
70.2k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
70.2k
  float* block = scratch;
43
70.2k
  if (ROWS < COLS) {
44
210k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
702k
      for (size_t x = 0; x < LF_COLS; x++) {
46
562k
        block[y * COLS + x] = input[y * input_stride + x] *
47
562k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
562k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
562k
      }
50
140k
    }
51
70.2k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
70.2k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
70.2k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
70.2k
                                  scratch_space);
64
70.2k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
38.9k
                                   const size_t output_stride, float* scratch) {
40
38.9k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
38.9k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
38.9k
  float* block = scratch;
43
38.9k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
38.9k
  } else {
52
194k
    for (size_t y = 0; y < LF_COLS; y++) {
53
778k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
622k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
622k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
622k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
622k
      }
58
155k
    }
59
38.9k
  }
60
61
38.9k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
38.9k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
38.9k
                                  scratch_space);
64
38.9k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
4.76k
                                   const size_t output_stride, float* scratch) {
40
4.76k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
4.76k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
4.76k
  float* block = scratch;
43
4.76k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
4.76k
  } else {
52
23.8k
    for (size_t y = 0; y < LF_COLS; y++) {
53
171k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
152k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
152k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
152k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
152k
      }
58
19.0k
    }
59
4.76k
  }
60
61
4.76k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
4.76k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
4.76k
                                  scratch_space);
64
4.76k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
7.35k
                                   const size_t output_stride, float* scratch) {
40
7.35k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
7.35k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
7.35k
  float* block = scratch;
43
7.35k
  if (ROWS < COLS) {
44
36.7k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
264k
      for (size_t x = 0; x < LF_COLS; x++) {
46
235k
        block[y * COLS + x] = input[y * input_stride + x] *
47
235k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
235k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
235k
      }
50
29.4k
    }
51
7.35k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
7.35k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
7.35k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
7.35k
                                  scratch_space);
64
7.35k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
19.5k
                                   const size_t output_stride, float* scratch) {
40
19.5k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
19.5k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
19.5k
  float* block = scratch;
43
19.5k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
19.5k
  } else {
52
176k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.40M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
1.25M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
1.25M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
1.25M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
1.25M
      }
58
156k
    }
59
19.5k
  }
60
61
19.5k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
19.5k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
19.5k
                                  scratch_space);
64
19.5k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
65
66
template <size_t S>
67
31.0M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
31.0M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
31.0M
  static_assert(S % 2 == 0, "S should be even");
70
31.0M
  float temp[kDCTBlockSize];
71
31.0M
  constexpr size_t num_2x2 = S / 2;
72
103M
  for (size_t y = 0; y < num_2x2; y++) {
73
289M
    for (size_t x = 0; x < num_2x2; x++) {
74
217M
      float c00 = block[y * 2 * stride + x * 2];
75
217M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
217M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
217M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
217M
      float r00 = c00 + c01 + c10 + c11;
79
217M
      float r01 = c00 + c01 - c10 - c11;
80
217M
      float r10 = c00 - c01 + c10 - c11;
81
217M
      float r11 = c00 - c01 - c10 + c11;
82
217M
      r00 *= 0.25f;
83
217M
      r01 *= 0.25f;
84
217M
      r10 *= 0.25f;
85
217M
      r11 *= 0.25f;
86
217M
      temp[y * kBlockDim + x] = r00;
87
217M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
217M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
217M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
217M
    }
91
72.4M
  }
92
175M
  for (size_t y = 0; y < S; y++) {
93
1.01G
    for (size_t x = 0; x < S; x++) {
94
869M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
869M
    }
96
144M
  }
97
31.0M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
826k
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
826k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
826k
  static_assert(S % 2 == 0, "S should be even");
70
826k
  float temp[kDCTBlockSize];
71
826k
  constexpr size_t num_2x2 = S / 2;
72
4.13M
  for (size_t y = 0; y < num_2x2; y++) {
73
16.5M
    for (size_t x = 0; x < num_2x2; x++) {
74
13.2M
      float c00 = block[y * 2 * stride + x * 2];
75
13.2M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
13.2M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
13.2M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
13.2M
      float r00 = c00 + c01 + c10 + c11;
79
13.2M
      float r01 = c00 + c01 - c10 - c11;
80
13.2M
      float r10 = c00 - c01 + c10 - c11;
81
13.2M
      float r11 = c00 - c01 - c10 + c11;
82
13.2M
      r00 *= 0.25f;
83
13.2M
      r01 *= 0.25f;
84
13.2M
      r10 *= 0.25f;
85
13.2M
      r11 *= 0.25f;
86
13.2M
      temp[y * kBlockDim + x] = r00;
87
13.2M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
13.2M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
13.2M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
13.2M
    }
91
3.30M
  }
92
7.43M
  for (size_t y = 0; y < S; y++) {
93
59.4M
    for (size_t x = 0; x < S; x++) {
94
52.8M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
52.8M
    }
96
6.61M
  }
97
826k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
826k
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
826k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
826k
  static_assert(S % 2 == 0, "S should be even");
70
826k
  float temp[kDCTBlockSize];
71
826k
  constexpr size_t num_2x2 = S / 2;
72
2.47M
  for (size_t y = 0; y < num_2x2; y++) {
73
4.95M
    for (size_t x = 0; x < num_2x2; x++) {
74
3.30M
      float c00 = block[y * 2 * stride + x * 2];
75
3.30M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
3.30M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
3.30M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
3.30M
      float r00 = c00 + c01 + c10 + c11;
79
3.30M
      float r01 = c00 + c01 - c10 - c11;
80
3.30M
      float r10 = c00 - c01 + c10 - c11;
81
3.30M
      float r11 = c00 - c01 - c10 + c11;
82
3.30M
      r00 *= 0.25f;
83
3.30M
      r01 *= 0.25f;
84
3.30M
      r10 *= 0.25f;
85
3.30M
      r11 *= 0.25f;
86
3.30M
      temp[y * kBlockDim + x] = r00;
87
3.30M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
3.30M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
3.30M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
3.30M
    }
91
1.65M
  }
92
4.13M
  for (size_t y = 0; y < S; y++) {
93
16.5M
    for (size_t x = 0; x < S; x++) {
94
13.2M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
13.2M
    }
96
3.30M
  }
97
826k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
826k
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
826k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
826k
  static_assert(S % 2 == 0, "S should be even");
70
826k
  float temp[kDCTBlockSize];
71
826k
  constexpr size_t num_2x2 = S / 2;
72
1.65M
  for (size_t y = 0; y < num_2x2; y++) {
73
1.65M
    for (size_t x = 0; x < num_2x2; x++) {
74
826k
      float c00 = block[y * 2 * stride + x * 2];
75
826k
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
826k
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
826k
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
826k
      float r00 = c00 + c01 + c10 + c11;
79
826k
      float r01 = c00 + c01 - c10 - c11;
80
826k
      float r10 = c00 - c01 + c10 - c11;
81
826k
      float r11 = c00 - c01 - c10 + c11;
82
826k
      r00 *= 0.25f;
83
826k
      r01 *= 0.25f;
84
826k
      r10 *= 0.25f;
85
826k
      r11 *= 0.25f;
86
826k
      temp[y * kBlockDim + x] = r00;
87
826k
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
826k
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
826k
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
826k
    }
91
826k
  }
92
2.47M
  for (size_t y = 0; y < S; y++) {
93
4.95M
    for (size_t x = 0; x < S; x++) {
94
3.30M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
3.30M
    }
96
1.65M
  }
97
826k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
8.69M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
8.69M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
8.69M
  static_assert(S % 2 == 0, "S should be even");
70
8.69M
  float temp[kDCTBlockSize];
71
8.69M
  constexpr size_t num_2x2 = S / 2;
72
43.4M
  for (size_t y = 0; y < num_2x2; y++) {
73
173M
    for (size_t x = 0; x < num_2x2; x++) {
74
139M
      float c00 = block[y * 2 * stride + x * 2];
75
139M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
139M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
139M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
139M
      float r00 = c00 + c01 + c10 + c11;
79
139M
      float r01 = c00 + c01 - c10 - c11;
80
139M
      float r10 = c00 - c01 + c10 - c11;
81
139M
      float r11 = c00 - c01 - c10 + c11;
82
139M
      r00 *= 0.25f;
83
139M
      r01 *= 0.25f;
84
139M
      r10 *= 0.25f;
85
139M
      r11 *= 0.25f;
86
139M
      temp[y * kBlockDim + x] = r00;
87
139M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
139M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
139M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
139M
    }
91
34.7M
  }
92
78.2M
  for (size_t y = 0; y < S; y++) {
93
626M
    for (size_t x = 0; x < S; x++) {
94
556M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
556M
    }
96
69.5M
  }
97
8.69M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
8.69M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
8.69M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
8.69M
  static_assert(S % 2 == 0, "S should be even");
70
8.69M
  float temp[kDCTBlockSize];
71
8.69M
  constexpr size_t num_2x2 = S / 2;
72
26.0M
  for (size_t y = 0; y < num_2x2; y++) {
73
52.1M
    for (size_t x = 0; x < num_2x2; x++) {
74
34.7M
      float c00 = block[y * 2 * stride + x * 2];
75
34.7M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
34.7M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
34.7M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
34.7M
      float r00 = c00 + c01 + c10 + c11;
79
34.7M
      float r01 = c00 + c01 - c10 - c11;
80
34.7M
      float r10 = c00 - c01 + c10 - c11;
81
34.7M
      float r11 = c00 - c01 - c10 + c11;
82
34.7M
      r00 *= 0.25f;
83
34.7M
      r01 *= 0.25f;
84
34.7M
      r10 *= 0.25f;
85
34.7M
      r11 *= 0.25f;
86
34.7M
      temp[y * kBlockDim + x] = r00;
87
34.7M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
34.7M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
34.7M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
34.7M
    }
91
17.3M
  }
92
43.4M
  for (size_t y = 0; y < S; y++) {
93
173M
    for (size_t x = 0; x < S; x++) {
94
139M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
139M
    }
96
34.7M
  }
97
8.69M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
8.69M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
8.69M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
8.69M
  static_assert(S % 2 == 0, "S should be even");
70
8.69M
  float temp[kDCTBlockSize];
71
8.69M
  constexpr size_t num_2x2 = S / 2;
72
17.3M
  for (size_t y = 0; y < num_2x2; y++) {
73
17.3M
    for (size_t x = 0; x < num_2x2; x++) {
74
8.69M
      float c00 = block[y * 2 * stride + x * 2];
75
8.69M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
8.69M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
8.69M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
8.69M
      float r00 = c00 + c01 + c10 + c11;
79
8.69M
      float r01 = c00 + c01 - c10 - c11;
80
8.69M
      float r10 = c00 - c01 + c10 - c11;
81
8.69M
      float r11 = c00 - c01 - c10 + c11;
82
8.69M
      r00 *= 0.25f;
83
8.69M
      r01 *= 0.25f;
84
8.69M
      r10 *= 0.25f;
85
8.69M
      r11 *= 0.25f;
86
8.69M
      temp[y * kBlockDim + x] = r00;
87
8.69M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
8.69M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
8.69M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
8.69M
    }
91
8.69M
  }
92
26.0M
  for (size_t y = 0; y < S; y++) {
93
52.1M
    for (size_t x = 0; x < S; x++) {
94
34.7M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
34.7M
    }
96
17.3M
  }
97
8.69M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
826k
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
826k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
826k
  static_assert(S % 2 == 0, "S should be even");
70
826k
  float temp[kDCTBlockSize];
71
826k
  constexpr size_t num_2x2 = S / 2;
72
4.13M
  for (size_t y = 0; y < num_2x2; y++) {
73
16.5M
    for (size_t x = 0; x < num_2x2; x++) {
74
13.2M
      float c00 = block[y * 2 * stride + x * 2];
75
13.2M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
13.2M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
13.2M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
13.2M
      float r00 = c00 + c01 + c10 + c11;
79
13.2M
      float r01 = c00 + c01 - c10 - c11;
80
13.2M
      float r10 = c00 - c01 + c10 - c11;
81
13.2M
      float r11 = c00 - c01 - c10 + c11;
82
13.2M
      r00 *= 0.25f;
83
13.2M
      r01 *= 0.25f;
84
13.2M
      r10 *= 0.25f;
85
13.2M
      r11 *= 0.25f;
86
13.2M
      temp[y * kBlockDim + x] = r00;
87
13.2M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
13.2M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
13.2M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
13.2M
    }
91
3.30M
  }
92
7.43M
  for (size_t y = 0; y < S; y++) {
93
59.4M
    for (size_t x = 0; x < S; x++) {
94
52.8M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
52.8M
    }
96
6.61M
  }
97
826k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
826k
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
826k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
826k
  static_assert(S % 2 == 0, "S should be even");
70
826k
  float temp[kDCTBlockSize];
71
826k
  constexpr size_t num_2x2 = S / 2;
72
2.47M
  for (size_t y = 0; y < num_2x2; y++) {
73
4.95M
    for (size_t x = 0; x < num_2x2; x++) {
74
3.30M
      float c00 = block[y * 2 * stride + x * 2];
75
3.30M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
3.30M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
3.30M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
3.30M
      float r00 = c00 + c01 + c10 + c11;
79
3.30M
      float r01 = c00 + c01 - c10 - c11;
80
3.30M
      float r10 = c00 - c01 + c10 - c11;
81
3.30M
      float r11 = c00 - c01 - c10 + c11;
82
3.30M
      r00 *= 0.25f;
83
3.30M
      r01 *= 0.25f;
84
3.30M
      r10 *= 0.25f;
85
3.30M
      r11 *= 0.25f;
86
3.30M
      temp[y * kBlockDim + x] = r00;
87
3.30M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
3.30M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
3.30M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
3.30M
    }
91
1.65M
  }
92
4.13M
  for (size_t y = 0; y < S; y++) {
93
16.5M
    for (size_t x = 0; x < S; x++) {
94
13.2M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
13.2M
    }
96
3.30M
  }
97
826k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
826k
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
826k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
826k
  static_assert(S % 2 == 0, "S should be even");
70
826k
  float temp[kDCTBlockSize];
71
826k
  constexpr size_t num_2x2 = S / 2;
72
1.65M
  for (size_t y = 0; y < num_2x2; y++) {
73
1.65M
    for (size_t x = 0; x < num_2x2; x++) {
74
826k
      float c00 = block[y * 2 * stride + x * 2];
75
826k
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
826k
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
826k
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
826k
      float r00 = c00 + c01 + c10 + c11;
79
826k
      float r01 = c00 + c01 - c10 - c11;
80
826k
      float r10 = c00 - c01 + c10 - c11;
81
826k
      float r11 = c00 - c01 - c10 + c11;
82
826k
      r00 *= 0.25f;
83
826k
      r01 *= 0.25f;
84
826k
      r10 *= 0.25f;
85
826k
      r11 *= 0.25f;
86
826k
      temp[y * kBlockDim + x] = r00;
87
826k
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
826k
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
826k
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
826k
    }
91
826k
  }
92
2.47M
  for (size_t y = 0; y < S; y++) {
93
4.95M
    for (size_t x = 0; x < S; x++) {
94
3.30M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
3.30M
    }
96
1.65M
  }
97
826k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
98
99
36.1M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
36.1M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
36.1M
      {
102
36.1M
          0.2500000000000000,
103
36.1M
          0.8769029297991420f,
104
36.1M
          0.0000000000000000,
105
36.1M
          0.0000000000000000,
106
36.1M
          0.0000000000000000,
107
36.1M
          -0.4105377591765233f,
108
36.1M
          0.0000000000000000,
109
36.1M
          0.0000000000000000,
110
36.1M
          0.0000000000000000,
111
36.1M
          0.0000000000000000,
112
36.1M
          0.0000000000000000,
113
36.1M
          0.0000000000000000,
114
36.1M
          0.0000000000000000,
115
36.1M
          0.0000000000000000,
116
36.1M
          0.0000000000000000,
117
36.1M
          0.0000000000000000,
118
36.1M
      },
119
36.1M
      {
120
36.1M
          0.2500000000000000,
121
36.1M
          0.2206518106944235f,
122
36.1M
          0.0000000000000000,
123
36.1M
          0.0000000000000000,
124
36.1M
          -0.7071067811865474f,
125
36.1M
          0.6235485373547691f,
126
36.1M
          0.0000000000000000,
127
36.1M
          0.0000000000000000,
128
36.1M
          0.0000000000000000,
129
36.1M
          0.0000000000000000,
130
36.1M
          0.0000000000000000,
131
36.1M
          0.0000000000000000,
132
36.1M
          0.0000000000000000,
133
36.1M
          0.0000000000000000,
134
36.1M
          0.0000000000000000,
135
36.1M
          0.0000000000000000,
136
36.1M
      },
137
36.1M
      {
138
36.1M
          0.2500000000000000,
139
36.1M
          -0.1014005039375376f,
140
36.1M
          0.4067007583026075f,
141
36.1M
          -0.2125574805828875f,
142
36.1M
          0.0000000000000000,
143
36.1M
          -0.0643507165794627f,
144
36.1M
          -0.4517556589999482f,
145
36.1M
          -0.3046847507248690f,
146
36.1M
          0.3017929516615495f,
147
36.1M
          0.4082482904638627f,
148
36.1M
          0.1747866975480809f,
149
36.1M
          -0.2110560104933578f,
150
36.1M
          -0.1426608480880726f,
151
36.1M
          -0.1381354035075859f,
152
36.1M
          -0.1743760259965107f,
153
36.1M
          0.1135498731499434f,
154
36.1M
      },
155
36.1M
      {
156
36.1M
          0.2500000000000000,
157
36.1M
          -0.1014005039375375f,
158
36.1M
          0.4444481661973445f,
159
36.1M
          0.3085497062849767f,
160
36.1M
          0.0000000000000000f,
161
36.1M
          -0.0643507165794627f,
162
36.1M
          0.1585450355184006f,
163
36.1M
          0.5112616136591823f,
164
36.1M
          0.2579236279634118f,
165
36.1M
          0.0000000000000000,
166
36.1M
          0.0812611176717539f,
167
36.1M
          0.1856718091610980f,
168
36.1M
          -0.3416446842253372f,
169
36.1M
          0.3302282550303788f,
170
36.1M
          0.0702790691196284f,
171
36.1M
          -0.0741750459581035f,
172
36.1M
      },
173
36.1M
      {
174
36.1M
          0.2500000000000000,
175
36.1M
          0.2206518106944236f,
176
36.1M
          0.0000000000000000,
177
36.1M
          0.0000000000000000,
178
36.1M
          0.7071067811865476f,
179
36.1M
          0.6235485373547694f,
180
36.1M
          0.0000000000000000,
181
36.1M
          0.0000000000000000,
182
36.1M
          0.0000000000000000,
183
36.1M
          0.0000000000000000,
184
36.1M
          0.0000000000000000,
185
36.1M
          0.0000000000000000,
186
36.1M
          0.0000000000000000,
187
36.1M
          0.0000000000000000,
188
36.1M
          0.0000000000000000,
189
36.1M
          0.0000000000000000,
190
36.1M
      },
191
36.1M
      {
192
36.1M
          0.2500000000000000,
193
36.1M
          -0.1014005039375378f,
194
36.1M
          0.0000000000000000,
195
36.1M
          0.4706702258572536f,
196
36.1M
          0.0000000000000000,
197
36.1M
          -0.0643507165794628f,
198
36.1M
          -0.0403851516082220f,
199
36.1M
          0.0000000000000000,
200
36.1M
          0.1627234014286620f,
201
36.1M
          0.0000000000000000,
202
36.1M
          0.0000000000000000,
203
36.1M
          0.0000000000000000,
204
36.1M
          0.7367497537172237f,
205
36.1M
          0.0875511500058708f,
206
36.1M
          -0.2921026642334881f,
207
36.1M
          0.1940289303259434f,
208
36.1M
      },
209
36.1M
      {
210
36.1M
          0.2500000000000000,
211
36.1M
          -0.1014005039375377f,
212
36.1M
          0.1957439937204294f,
213
36.1M
          -0.1621205195722993f,
214
36.1M
          0.0000000000000000,
215
36.1M
          -0.0643507165794628f,
216
36.1M
          0.0074182263792424f,
217
36.1M
          -0.2904801297289980f,
218
36.1M
          0.0952002265347504f,
219
36.1M
          0.0000000000000000,
220
36.1M
          -0.3675398009862027f,
221
36.1M
          0.4921585901373873f,
222
36.1M
          0.2462710772207515f,
223
36.1M
          -0.0794670660590957f,
224
36.1M
          0.3623817333531167f,
225
36.1M
          -0.4351904965232280f,
226
36.1M
      },
227
36.1M
      {
228
36.1M
          0.2500000000000000,
229
36.1M
          -0.1014005039375376f,
230
36.1M
          0.2929100136981264f,
231
36.1M
          0.0000000000000000,
232
36.1M
          0.0000000000000000,
233
36.1M
          -0.0643507165794627f,
234
36.1M
          0.3935103426921017f,
235
36.1M
          -0.0657870154914280f,
236
36.1M
          0.0000000000000000,
237
36.1M
          -0.4082482904638628f,
238
36.1M
          -0.3078822139579090f,
239
36.1M
          -0.3852501370925192f,
240
36.1M
          -0.0857401903551931f,
241
36.1M
          -0.4613374887461511f,
242
36.1M
          0.0000000000000000,
243
36.1M
          0.2191868483885747f,
244
36.1M
      },
245
36.1M
      {
246
36.1M
          0.2500000000000000,
247
36.1M
          -0.1014005039375376f,
248
36.1M
          -0.4067007583026072f,
249
36.1M
          -0.2125574805828705f,
250
36.1M
          0.0000000000000000,
251
36.1M
          -0.0643507165794627f,
252
36.1M
          -0.4517556589999464f,
253
36.1M
          0.3046847507248840f,
254
36.1M
          0.3017929516615503f,
255
36.1M
          -0.4082482904638635f,
256
36.1M
          -0.1747866975480813f,
257
36.1M
          0.2110560104933581f,
258
36.1M
          -0.1426608480880734f,
259
36.1M
          -0.1381354035075829f,
260
36.1M
          -0.1743760259965108f,
261
36.1M
          0.1135498731499426f,
262
36.1M
      },
263
36.1M
      {
264
36.1M
          0.2500000000000000,
265
36.1M
          -0.1014005039375377f,
266
36.1M
          -0.1957439937204287f,
267
36.1M
          -0.1621205195722833f,
268
36.1M
          0.0000000000000000,
269
36.1M
          -0.0643507165794628f,
270
36.1M
          0.0074182263792444f,
271
36.1M
          0.2904801297290076f,
272
36.1M
          0.0952002265347505f,
273
36.1M
          0.0000000000000000,
274
36.1M
          0.3675398009862011f,
275
36.1M
          -0.4921585901373891f,
276
36.1M
          0.2462710772207514f,
277
36.1M
          -0.0794670660591026f,
278
36.1M
          0.3623817333531165f,
279
36.1M
          -0.4351904965232251f,
280
36.1M
      },
281
36.1M
      {
282
36.1M
          0.2500000000000000,
283
36.1M
          -0.1014005039375375f,
284
36.1M
          0.0000000000000000,
285
36.1M
          -0.4706702258572528f,
286
36.1M
          0.0000000000000000,
287
36.1M
          -0.0643507165794627f,
288
36.1M
          0.1107416575309343f,
289
36.1M
          0.0000000000000000,
290
36.1M
          -0.1627234014286617f,
291
36.1M
          0.0000000000000000,
292
36.1M
          0.0000000000000000,
293
36.1M
          0.0000000000000000,
294
36.1M
          0.1488339922711357f,
295
36.1M
          0.4972464710953509f,
296
36.1M
          0.2921026642334879f,
297
36.1M
          0.5550443808910661f,
298
36.1M
      },
299
36.1M
      {
300
36.1M
          0.2500000000000000,
301
36.1M
          -0.1014005039375377f,
302
36.1M
          0.1137907446044809f,
303
36.1M
          -0.1464291867126764f,
304
36.1M
          0.0000000000000000,
305
36.1M
          -0.0643507165794628f,
306
36.1M
          0.0829816309488205f,
307
36.1M
          -0.2388977352334460f,
308
36.1M
          -0.3531238544981630f,
309
36.1M
          -0.4082482904638630f,
310
36.1M
          0.4826689115059883f,
311
36.1M
          0.1741941265991622f,
312
36.1M
          -0.0476868035022925f,
313
36.1M
          0.1253805944856366f,
314
36.1M
          -0.4326608024727445f,
315
36.1M
          -0.2546827712406646f,
316
36.1M
      },
317
36.1M
      {
318
36.1M
          0.2500000000000000,
319
36.1M
          -0.1014005039375377f,
320
36.1M
          -0.4444481661973438f,
321
36.1M
          0.3085497062849487f,
322
36.1M
          0.0000000000000000,
323
36.1M
          -0.0643507165794628f,
324
36.1M
          0.1585450355183970f,
325
36.1M
          -0.5112616136592012f,
326
36.1M
          0.2579236279634129f,
327
36.1M
          0.0000000000000000,
328
36.1M
          -0.0812611176717504f,
329
36.1M
          -0.1856718091610990f,
330
36.1M
          -0.3416446842253373f,
331
36.1M
          0.3302282550303805f,
332
36.1M
          0.0702790691196282f,
333
36.1M
          -0.0741750459581023f,
334
36.1M
      },
335
36.1M
      {
336
36.1M
          0.2500000000000000,
337
36.1M
          -0.1014005039375376f,
338
36.1M
          -0.2929100136981264f,
339
36.1M
          0.0000000000000000,
340
36.1M
          0.0000000000000000,
341
36.1M
          -0.0643507165794627f,
342
36.1M
          0.3935103426921022f,
343
36.1M
          0.0657870154914254f,
344
36.1M
          0.0000000000000000,
345
36.1M
          0.4082482904638634f,
346
36.1M
          0.3078822139579031f,
347
36.1M
          0.3852501370925211f,
348
36.1M
          -0.0857401903551927f,
349
36.1M
          -0.4613374887461554f,
350
36.1M
          0.0000000000000000,
351
36.1M
          0.2191868483885728f,
352
36.1M
      },
353
36.1M
      {
354
36.1M
          0.2500000000000000,
355
36.1M
          -0.1014005039375376f,
356
36.1M
          -0.1137907446044814f,
357
36.1M
          -0.1464291867126654f,
358
36.1M
          0.0000000000000000,
359
36.1M
          -0.0643507165794627f,
360
36.1M
          0.0829816309488214f,
361
36.1M
          0.2388977352334547f,
362
36.1M
          -0.3531238544981624f,
363
36.1M
          0.4082482904638630f,
364
36.1M
          -0.4826689115059858f,
365
36.1M
          -0.1741941265991621f,
366
36.1M
          -0.0476868035022928f,
367
36.1M
          0.1253805944856431f,
368
36.1M
          -0.4326608024727457f,
369
36.1M
          -0.2546827712406641f,
370
36.1M
      },
371
36.1M
      {
372
36.1M
          0.2500000000000000,
373
36.1M
          -0.1014005039375374f,
374
36.1M
          0.0000000000000000,
375
36.1M
          0.4251149611657548f,
376
36.1M
          0.0000000000000000,
377
36.1M
          -0.0643507165794626f,
378
36.1M
          -0.4517556589999480f,
379
36.1M
          0.0000000000000000,
380
36.1M
          -0.6035859033230976f,
381
36.1M
          0.0000000000000000,
382
36.1M
          0.0000000000000000,
383
36.1M
          0.0000000000000000,
384
36.1M
          -0.1426608480880724f,
385
36.1M
          -0.1381354035075845f,
386
36.1M
          0.3487520519930227f,
387
36.1M
          0.1135498731499429f,
388
36.1M
      },
389
36.1M
  };
390
391
36.1M
  const HWY_CAPPED(float, 16) d;
392
108M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
72.3M
    auto scalar = Zero(d);
394
1.22G
    for (size_t j = 0; j < 16; j++) {
395
1.15G
      auto px = Set(d, pixels[j]);
396
1.15G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
1.15G
      scalar = MulAdd(px, basis, scalar);
398
1.15G
    }
399
72.3M
    Store(scalar, d, coeffs + i);
400
72.3M
  }
401
36.1M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
686k
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
686k
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
686k
      {
102
686k
          0.2500000000000000,
103
686k
          0.8769029297991420f,
104
686k
          0.0000000000000000,
105
686k
          0.0000000000000000,
106
686k
          0.0000000000000000,
107
686k
          -0.4105377591765233f,
108
686k
          0.0000000000000000,
109
686k
          0.0000000000000000,
110
686k
          0.0000000000000000,
111
686k
          0.0000000000000000,
112
686k
          0.0000000000000000,
113
686k
          0.0000000000000000,
114
686k
          0.0000000000000000,
115
686k
          0.0000000000000000,
116
686k
          0.0000000000000000,
117
686k
          0.0000000000000000,
118
686k
      },
119
686k
      {
120
686k
          0.2500000000000000,
121
686k
          0.2206518106944235f,
122
686k
          0.0000000000000000,
123
686k
          0.0000000000000000,
124
686k
          -0.7071067811865474f,
125
686k
          0.6235485373547691f,
126
686k
          0.0000000000000000,
127
686k
          0.0000000000000000,
128
686k
          0.0000000000000000,
129
686k
          0.0000000000000000,
130
686k
          0.0000000000000000,
131
686k
          0.0000000000000000,
132
686k
          0.0000000000000000,
133
686k
          0.0000000000000000,
134
686k
          0.0000000000000000,
135
686k
          0.0000000000000000,
136
686k
      },
137
686k
      {
138
686k
          0.2500000000000000,
139
686k
          -0.1014005039375376f,
140
686k
          0.4067007583026075f,
141
686k
          -0.2125574805828875f,
142
686k
          0.0000000000000000,
143
686k
          -0.0643507165794627f,
144
686k
          -0.4517556589999482f,
145
686k
          -0.3046847507248690f,
146
686k
          0.3017929516615495f,
147
686k
          0.4082482904638627f,
148
686k
          0.1747866975480809f,
149
686k
          -0.2110560104933578f,
150
686k
          -0.1426608480880726f,
151
686k
          -0.1381354035075859f,
152
686k
          -0.1743760259965107f,
153
686k
          0.1135498731499434f,
154
686k
      },
155
686k
      {
156
686k
          0.2500000000000000,
157
686k
          -0.1014005039375375f,
158
686k
          0.4444481661973445f,
159
686k
          0.3085497062849767f,
160
686k
          0.0000000000000000f,
161
686k
          -0.0643507165794627f,
162
686k
          0.1585450355184006f,
163
686k
          0.5112616136591823f,
164
686k
          0.2579236279634118f,
165
686k
          0.0000000000000000,
166
686k
          0.0812611176717539f,
167
686k
          0.1856718091610980f,
168
686k
          -0.3416446842253372f,
169
686k
          0.3302282550303788f,
170
686k
          0.0702790691196284f,
171
686k
          -0.0741750459581035f,
172
686k
      },
173
686k
      {
174
686k
          0.2500000000000000,
175
686k
          0.2206518106944236f,
176
686k
          0.0000000000000000,
177
686k
          0.0000000000000000,
178
686k
          0.7071067811865476f,
179
686k
          0.6235485373547694f,
180
686k
          0.0000000000000000,
181
686k
          0.0000000000000000,
182
686k
          0.0000000000000000,
183
686k
          0.0000000000000000,
184
686k
          0.0000000000000000,
185
686k
          0.0000000000000000,
186
686k
          0.0000000000000000,
187
686k
          0.0000000000000000,
188
686k
          0.0000000000000000,
189
686k
          0.0000000000000000,
190
686k
      },
191
686k
      {
192
686k
          0.2500000000000000,
193
686k
          -0.1014005039375378f,
194
686k
          0.0000000000000000,
195
686k
          0.4706702258572536f,
196
686k
          0.0000000000000000,
197
686k
          -0.0643507165794628f,
198
686k
          -0.0403851516082220f,
199
686k
          0.0000000000000000,
200
686k
          0.1627234014286620f,
201
686k
          0.0000000000000000,
202
686k
          0.0000000000000000,
203
686k
          0.0000000000000000,
204
686k
          0.7367497537172237f,
205
686k
          0.0875511500058708f,
206
686k
          -0.2921026642334881f,
207
686k
          0.1940289303259434f,
208
686k
      },
209
686k
      {
210
686k
          0.2500000000000000,
211
686k
          -0.1014005039375377f,
212
686k
          0.1957439937204294f,
213
686k
          -0.1621205195722993f,
214
686k
          0.0000000000000000,
215
686k
          -0.0643507165794628f,
216
686k
          0.0074182263792424f,
217
686k
          -0.2904801297289980f,
218
686k
          0.0952002265347504f,
219
686k
          0.0000000000000000,
220
686k
          -0.3675398009862027f,
221
686k
          0.4921585901373873f,
222
686k
          0.2462710772207515f,
223
686k
          -0.0794670660590957f,
224
686k
          0.3623817333531167f,
225
686k
          -0.4351904965232280f,
226
686k
      },
227
686k
      {
228
686k
          0.2500000000000000,
229
686k
          -0.1014005039375376f,
230
686k
          0.2929100136981264f,
231
686k
          0.0000000000000000,
232
686k
          0.0000000000000000,
233
686k
          -0.0643507165794627f,
234
686k
          0.3935103426921017f,
235
686k
          -0.0657870154914280f,
236
686k
          0.0000000000000000,
237
686k
          -0.4082482904638628f,
238
686k
          -0.3078822139579090f,
239
686k
          -0.3852501370925192f,
240
686k
          -0.0857401903551931f,
241
686k
          -0.4613374887461511f,
242
686k
          0.0000000000000000,
243
686k
          0.2191868483885747f,
244
686k
      },
245
686k
      {
246
686k
          0.2500000000000000,
247
686k
          -0.1014005039375376f,
248
686k
          -0.4067007583026072f,
249
686k
          -0.2125574805828705f,
250
686k
          0.0000000000000000,
251
686k
          -0.0643507165794627f,
252
686k
          -0.4517556589999464f,
253
686k
          0.3046847507248840f,
254
686k
          0.3017929516615503f,
255
686k
          -0.4082482904638635f,
256
686k
          -0.1747866975480813f,
257
686k
          0.2110560104933581f,
258
686k
          -0.1426608480880734f,
259
686k
          -0.1381354035075829f,
260
686k
          -0.1743760259965108f,
261
686k
          0.1135498731499426f,
262
686k
      },
263
686k
      {
264
686k
          0.2500000000000000,
265
686k
          -0.1014005039375377f,
266
686k
          -0.1957439937204287f,
267
686k
          -0.1621205195722833f,
268
686k
          0.0000000000000000,
269
686k
          -0.0643507165794628f,
270
686k
          0.0074182263792444f,
271
686k
          0.2904801297290076f,
272
686k
          0.0952002265347505f,
273
686k
          0.0000000000000000,
274
686k
          0.3675398009862011f,
275
686k
          -0.4921585901373891f,
276
686k
          0.2462710772207514f,
277
686k
          -0.0794670660591026f,
278
686k
          0.3623817333531165f,
279
686k
          -0.4351904965232251f,
280
686k
      },
281
686k
      {
282
686k
          0.2500000000000000,
283
686k
          -0.1014005039375375f,
284
686k
          0.0000000000000000,
285
686k
          -0.4706702258572528f,
286
686k
          0.0000000000000000,
287
686k
          -0.0643507165794627f,
288
686k
          0.1107416575309343f,
289
686k
          0.0000000000000000,
290
686k
          -0.1627234014286617f,
291
686k
          0.0000000000000000,
292
686k
          0.0000000000000000,
293
686k
          0.0000000000000000,
294
686k
          0.1488339922711357f,
295
686k
          0.4972464710953509f,
296
686k
          0.2921026642334879f,
297
686k
          0.5550443808910661f,
298
686k
      },
299
686k
      {
300
686k
          0.2500000000000000,
301
686k
          -0.1014005039375377f,
302
686k
          0.1137907446044809f,
303
686k
          -0.1464291867126764f,
304
686k
          0.0000000000000000,
305
686k
          -0.0643507165794628f,
306
686k
          0.0829816309488205f,
307
686k
          -0.2388977352334460f,
308
686k
          -0.3531238544981630f,
309
686k
          -0.4082482904638630f,
310
686k
          0.4826689115059883f,
311
686k
          0.1741941265991622f,
312
686k
          -0.0476868035022925f,
313
686k
          0.1253805944856366f,
314
686k
          -0.4326608024727445f,
315
686k
          -0.2546827712406646f,
316
686k
      },
317
686k
      {
318
686k
          0.2500000000000000,
319
686k
          -0.1014005039375377f,
320
686k
          -0.4444481661973438f,
321
686k
          0.3085497062849487f,
322
686k
          0.0000000000000000,
323
686k
          -0.0643507165794628f,
324
686k
          0.1585450355183970f,
325
686k
          -0.5112616136592012f,
326
686k
          0.2579236279634129f,
327
686k
          0.0000000000000000,
328
686k
          -0.0812611176717504f,
329
686k
          -0.1856718091610990f,
330
686k
          -0.3416446842253373f,
331
686k
          0.3302282550303805f,
332
686k
          0.0702790691196282f,
333
686k
          -0.0741750459581023f,
334
686k
      },
335
686k
      {
336
686k
          0.2500000000000000,
337
686k
          -0.1014005039375376f,
338
686k
          -0.2929100136981264f,
339
686k
          0.0000000000000000,
340
686k
          0.0000000000000000,
341
686k
          -0.0643507165794627f,
342
686k
          0.3935103426921022f,
343
686k
          0.0657870154914254f,
344
686k
          0.0000000000000000,
345
686k
          0.4082482904638634f,
346
686k
          0.3078822139579031f,
347
686k
          0.3852501370925211f,
348
686k
          -0.0857401903551927f,
349
686k
          -0.4613374887461554f,
350
686k
          0.0000000000000000,
351
686k
          0.2191868483885728f,
352
686k
      },
353
686k
      {
354
686k
          0.2500000000000000,
355
686k
          -0.1014005039375376f,
356
686k
          -0.1137907446044814f,
357
686k
          -0.1464291867126654f,
358
686k
          0.0000000000000000,
359
686k
          -0.0643507165794627f,
360
686k
          0.0829816309488214f,
361
686k
          0.2388977352334547f,
362
686k
          -0.3531238544981624f,
363
686k
          0.4082482904638630f,
364
686k
          -0.4826689115059858f,
365
686k
          -0.1741941265991621f,
366
686k
          -0.0476868035022928f,
367
686k
          0.1253805944856431f,
368
686k
          -0.4326608024727457f,
369
686k
          -0.2546827712406641f,
370
686k
      },
371
686k
      {
372
686k
          0.2500000000000000,
373
686k
          -0.1014005039375374f,
374
686k
          0.0000000000000000,
375
686k
          0.4251149611657548f,
376
686k
          0.0000000000000000,
377
686k
          -0.0643507165794626f,
378
686k
          -0.4517556589999480f,
379
686k
          0.0000000000000000,
380
686k
          -0.6035859033230976f,
381
686k
          0.0000000000000000,
382
686k
          0.0000000000000000,
383
686k
          0.0000000000000000,
384
686k
          -0.1426608480880724f,
385
686k
          -0.1381354035075845f,
386
686k
          0.3487520519930227f,
387
686k
          0.1135498731499429f,
388
686k
      },
389
686k
  };
390
391
686k
  const HWY_CAPPED(float, 16) d;
392
2.05M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
1.37M
    auto scalar = Zero(d);
394
23.3M
    for (size_t j = 0; j < 16; j++) {
395
21.9M
      auto px = Set(d, pixels[j]);
396
21.9M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
21.9M
      scalar = MulAdd(px, basis, scalar);
398
21.9M
    }
399
1.37M
    Store(scalar, d, coeffs + i);
400
1.37M
  }
401
686k
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
34.7M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
34.7M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
34.7M
      {
102
34.7M
          0.2500000000000000,
103
34.7M
          0.8769029297991420f,
104
34.7M
          0.0000000000000000,
105
34.7M
          0.0000000000000000,
106
34.7M
          0.0000000000000000,
107
34.7M
          -0.4105377591765233f,
108
34.7M
          0.0000000000000000,
109
34.7M
          0.0000000000000000,
110
34.7M
          0.0000000000000000,
111
34.7M
          0.0000000000000000,
112
34.7M
          0.0000000000000000,
113
34.7M
          0.0000000000000000,
114
34.7M
          0.0000000000000000,
115
34.7M
          0.0000000000000000,
116
34.7M
          0.0000000000000000,
117
34.7M
          0.0000000000000000,
118
34.7M
      },
119
34.7M
      {
120
34.7M
          0.2500000000000000,
121
34.7M
          0.2206518106944235f,
122
34.7M
          0.0000000000000000,
123
34.7M
          0.0000000000000000,
124
34.7M
          -0.7071067811865474f,
125
34.7M
          0.6235485373547691f,
126
34.7M
          0.0000000000000000,
127
34.7M
          0.0000000000000000,
128
34.7M
          0.0000000000000000,
129
34.7M
          0.0000000000000000,
130
34.7M
          0.0000000000000000,
131
34.7M
          0.0000000000000000,
132
34.7M
          0.0000000000000000,
133
34.7M
          0.0000000000000000,
134
34.7M
          0.0000000000000000,
135
34.7M
          0.0000000000000000,
136
34.7M
      },
137
34.7M
      {
138
34.7M
          0.2500000000000000,
139
34.7M
          -0.1014005039375376f,
140
34.7M
          0.4067007583026075f,
141
34.7M
          -0.2125574805828875f,
142
34.7M
          0.0000000000000000,
143
34.7M
          -0.0643507165794627f,
144
34.7M
          -0.4517556589999482f,
145
34.7M
          -0.3046847507248690f,
146
34.7M
          0.3017929516615495f,
147
34.7M
          0.4082482904638627f,
148
34.7M
          0.1747866975480809f,
149
34.7M
          -0.2110560104933578f,
150
34.7M
          -0.1426608480880726f,
151
34.7M
          -0.1381354035075859f,
152
34.7M
          -0.1743760259965107f,
153
34.7M
          0.1135498731499434f,
154
34.7M
      },
155
34.7M
      {
156
34.7M
          0.2500000000000000,
157
34.7M
          -0.1014005039375375f,
158
34.7M
          0.4444481661973445f,
159
34.7M
          0.3085497062849767f,
160
34.7M
          0.0000000000000000f,
161
34.7M
          -0.0643507165794627f,
162
34.7M
          0.1585450355184006f,
163
34.7M
          0.5112616136591823f,
164
34.7M
          0.2579236279634118f,
165
34.7M
          0.0000000000000000,
166
34.7M
          0.0812611176717539f,
167
34.7M
          0.1856718091610980f,
168
34.7M
          -0.3416446842253372f,
169
34.7M
          0.3302282550303788f,
170
34.7M
          0.0702790691196284f,
171
34.7M
          -0.0741750459581035f,
172
34.7M
      },
173
34.7M
      {
174
34.7M
          0.2500000000000000,
175
34.7M
          0.2206518106944236f,
176
34.7M
          0.0000000000000000,
177
34.7M
          0.0000000000000000,
178
34.7M
          0.7071067811865476f,
179
34.7M
          0.6235485373547694f,
180
34.7M
          0.0000000000000000,
181
34.7M
          0.0000000000000000,
182
34.7M
          0.0000000000000000,
183
34.7M
          0.0000000000000000,
184
34.7M
          0.0000000000000000,
185
34.7M
          0.0000000000000000,
186
34.7M
          0.0000000000000000,
187
34.7M
          0.0000000000000000,
188
34.7M
          0.0000000000000000,
189
34.7M
          0.0000000000000000,
190
34.7M
      },
191
34.7M
      {
192
34.7M
          0.2500000000000000,
193
34.7M
          -0.1014005039375378f,
194
34.7M
          0.0000000000000000,
195
34.7M
          0.4706702258572536f,
196
34.7M
          0.0000000000000000,
197
34.7M
          -0.0643507165794628f,
198
34.7M
          -0.0403851516082220f,
199
34.7M
          0.0000000000000000,
200
34.7M
          0.1627234014286620f,
201
34.7M
          0.0000000000000000,
202
34.7M
          0.0000000000000000,
203
34.7M
          0.0000000000000000,
204
34.7M
          0.7367497537172237f,
205
34.7M
          0.0875511500058708f,
206
34.7M
          -0.2921026642334881f,
207
34.7M
          0.1940289303259434f,
208
34.7M
      },
209
34.7M
      {
210
34.7M
          0.2500000000000000,
211
34.7M
          -0.1014005039375377f,
212
34.7M
          0.1957439937204294f,
213
34.7M
          -0.1621205195722993f,
214
34.7M
          0.0000000000000000,
215
34.7M
          -0.0643507165794628f,
216
34.7M
          0.0074182263792424f,
217
34.7M
          -0.2904801297289980f,
218
34.7M
          0.0952002265347504f,
219
34.7M
          0.0000000000000000,
220
34.7M
          -0.3675398009862027f,
221
34.7M
          0.4921585901373873f,
222
34.7M
          0.2462710772207515f,
223
34.7M
          -0.0794670660590957f,
224
34.7M
          0.3623817333531167f,
225
34.7M
          -0.4351904965232280f,
226
34.7M
      },
227
34.7M
      {
228
34.7M
          0.2500000000000000,
229
34.7M
          -0.1014005039375376f,
230
34.7M
          0.2929100136981264f,
231
34.7M
          0.0000000000000000,
232
34.7M
          0.0000000000000000,
233
34.7M
          -0.0643507165794627f,
234
34.7M
          0.3935103426921017f,
235
34.7M
          -0.0657870154914280f,
236
34.7M
          0.0000000000000000,
237
34.7M
          -0.4082482904638628f,
238
34.7M
          -0.3078822139579090f,
239
34.7M
          -0.3852501370925192f,
240
34.7M
          -0.0857401903551931f,
241
34.7M
          -0.4613374887461511f,
242
34.7M
          0.0000000000000000,
243
34.7M
          0.2191868483885747f,
244
34.7M
      },
245
34.7M
      {
246
34.7M
          0.2500000000000000,
247
34.7M
          -0.1014005039375376f,
248
34.7M
          -0.4067007583026072f,
249
34.7M
          -0.2125574805828705f,
250
34.7M
          0.0000000000000000,
251
34.7M
          -0.0643507165794627f,
252
34.7M
          -0.4517556589999464f,
253
34.7M
          0.3046847507248840f,
254
34.7M
          0.3017929516615503f,
255
34.7M
          -0.4082482904638635f,
256
34.7M
          -0.1747866975480813f,
257
34.7M
          0.2110560104933581f,
258
34.7M
          -0.1426608480880734f,
259
34.7M
          -0.1381354035075829f,
260
34.7M
          -0.1743760259965108f,
261
34.7M
          0.1135498731499426f,
262
34.7M
      },
263
34.7M
      {
264
34.7M
          0.2500000000000000,
265
34.7M
          -0.1014005039375377f,
266
34.7M
          -0.1957439937204287f,
267
34.7M
          -0.1621205195722833f,
268
34.7M
          0.0000000000000000,
269
34.7M
          -0.0643507165794628f,
270
34.7M
          0.0074182263792444f,
271
34.7M
          0.2904801297290076f,
272
34.7M
          0.0952002265347505f,
273
34.7M
          0.0000000000000000,
274
34.7M
          0.3675398009862011f,
275
34.7M
          -0.4921585901373891f,
276
34.7M
          0.2462710772207514f,
277
34.7M
          -0.0794670660591026f,
278
34.7M
          0.3623817333531165f,
279
34.7M
          -0.4351904965232251f,
280
34.7M
      },
281
34.7M
      {
282
34.7M
          0.2500000000000000,
283
34.7M
          -0.1014005039375375f,
284
34.7M
          0.0000000000000000,
285
34.7M
          -0.4706702258572528f,
286
34.7M
          0.0000000000000000,
287
34.7M
          -0.0643507165794627f,
288
34.7M
          0.1107416575309343f,
289
34.7M
          0.0000000000000000,
290
34.7M
          -0.1627234014286617f,
291
34.7M
          0.0000000000000000,
292
34.7M
          0.0000000000000000,
293
34.7M
          0.0000000000000000,
294
34.7M
          0.1488339922711357f,
295
34.7M
          0.4972464710953509f,
296
34.7M
          0.2921026642334879f,
297
34.7M
          0.5550443808910661f,
298
34.7M
      },
299
34.7M
      {
300
34.7M
          0.2500000000000000,
301
34.7M
          -0.1014005039375377f,
302
34.7M
          0.1137907446044809f,
303
34.7M
          -0.1464291867126764f,
304
34.7M
          0.0000000000000000,
305
34.7M
          -0.0643507165794628f,
306
34.7M
          0.0829816309488205f,
307
34.7M
          -0.2388977352334460f,
308
34.7M
          -0.3531238544981630f,
309
34.7M
          -0.4082482904638630f,
310
34.7M
          0.4826689115059883f,
311
34.7M
          0.1741941265991622f,
312
34.7M
          -0.0476868035022925f,
313
34.7M
          0.1253805944856366f,
314
34.7M
          -0.4326608024727445f,
315
34.7M
          -0.2546827712406646f,
316
34.7M
      },
317
34.7M
      {
318
34.7M
          0.2500000000000000,
319
34.7M
          -0.1014005039375377f,
320
34.7M
          -0.4444481661973438f,
321
34.7M
          0.3085497062849487f,
322
34.7M
          0.0000000000000000,
323
34.7M
          -0.0643507165794628f,
324
34.7M
          0.1585450355183970f,
325
34.7M
          -0.5112616136592012f,
326
34.7M
          0.2579236279634129f,
327
34.7M
          0.0000000000000000,
328
34.7M
          -0.0812611176717504f,
329
34.7M
          -0.1856718091610990f,
330
34.7M
          -0.3416446842253373f,
331
34.7M
          0.3302282550303805f,
332
34.7M
          0.0702790691196282f,
333
34.7M
          -0.0741750459581023f,
334
34.7M
      },
335
34.7M
      {
336
34.7M
          0.2500000000000000,
337
34.7M
          -0.1014005039375376f,
338
34.7M
          -0.2929100136981264f,
339
34.7M
          0.0000000000000000,
340
34.7M
          0.0000000000000000,
341
34.7M
          -0.0643507165794627f,
342
34.7M
          0.3935103426921022f,
343
34.7M
          0.0657870154914254f,
344
34.7M
          0.0000000000000000,
345
34.7M
          0.4082482904638634f,
346
34.7M
          0.3078822139579031f,
347
34.7M
          0.3852501370925211f,
348
34.7M
          -0.0857401903551927f,
349
34.7M
          -0.4613374887461554f,
350
34.7M
          0.0000000000000000,
351
34.7M
          0.2191868483885728f,
352
34.7M
      },
353
34.7M
      {
354
34.7M
          0.2500000000000000,
355
34.7M
          -0.1014005039375376f,
356
34.7M
          -0.1137907446044814f,
357
34.7M
          -0.1464291867126654f,
358
34.7M
          0.0000000000000000,
359
34.7M
          -0.0643507165794627f,
360
34.7M
          0.0829816309488214f,
361
34.7M
          0.2388977352334547f,
362
34.7M
          -0.3531238544981624f,
363
34.7M
          0.4082482904638630f,
364
34.7M
          -0.4826689115059858f,
365
34.7M
          -0.1741941265991621f,
366
34.7M
          -0.0476868035022928f,
367
34.7M
          0.1253805944856431f,
368
34.7M
          -0.4326608024727457f,
369
34.7M
          -0.2546827712406641f,
370
34.7M
      },
371
34.7M
      {
372
34.7M
          0.2500000000000000,
373
34.7M
          -0.1014005039375374f,
374
34.7M
          0.0000000000000000,
375
34.7M
          0.4251149611657548f,
376
34.7M
          0.0000000000000000,
377
34.7M
          -0.0643507165794626f,
378
34.7M
          -0.4517556589999480f,
379
34.7M
          0.0000000000000000,
380
34.7M
          -0.6035859033230976f,
381
34.7M
          0.0000000000000000,
382
34.7M
          0.0000000000000000,
383
34.7M
          0.0000000000000000,
384
34.7M
          -0.1426608480880724f,
385
34.7M
          -0.1381354035075845f,
386
34.7M
          0.3487520519930227f,
387
34.7M
          0.1135498731499429f,
388
34.7M
      },
389
34.7M
  };
390
391
34.7M
  const HWY_CAPPED(float, 16) d;
392
104M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
69.5M
    auto scalar = Zero(d);
394
1.18G
    for (size_t j = 0; j < 16; j++) {
395
1.11G
      auto px = Set(d, pixels[j]);
396
1.11G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
1.11G
      scalar = MulAdd(px, basis, scalar);
398
1.11G
    }
399
69.5M
    Store(scalar, d, coeffs + i);
400
69.5M
  }
401
34.7M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
686k
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
686k
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
686k
      {
102
686k
          0.2500000000000000,
103
686k
          0.8769029297991420f,
104
686k
          0.0000000000000000,
105
686k
          0.0000000000000000,
106
686k
          0.0000000000000000,
107
686k
          -0.4105377591765233f,
108
686k
          0.0000000000000000,
109
686k
          0.0000000000000000,
110
686k
          0.0000000000000000,
111
686k
          0.0000000000000000,
112
686k
          0.0000000000000000,
113
686k
          0.0000000000000000,
114
686k
          0.0000000000000000,
115
686k
          0.0000000000000000,
116
686k
          0.0000000000000000,
117
686k
          0.0000000000000000,
118
686k
      },
119
686k
      {
120
686k
          0.2500000000000000,
121
686k
          0.2206518106944235f,
122
686k
          0.0000000000000000,
123
686k
          0.0000000000000000,
124
686k
          -0.7071067811865474f,
125
686k
          0.6235485373547691f,
126
686k
          0.0000000000000000,
127
686k
          0.0000000000000000,
128
686k
          0.0000000000000000,
129
686k
          0.0000000000000000,
130
686k
          0.0000000000000000,
131
686k
          0.0000000000000000,
132
686k
          0.0000000000000000,
133
686k
          0.0000000000000000,
134
686k
          0.0000000000000000,
135
686k
          0.0000000000000000,
136
686k
      },
137
686k
      {
138
686k
          0.2500000000000000,
139
686k
          -0.1014005039375376f,
140
686k
          0.4067007583026075f,
141
686k
          -0.2125574805828875f,
142
686k
          0.0000000000000000,
143
686k
          -0.0643507165794627f,
144
686k
          -0.4517556589999482f,
145
686k
          -0.3046847507248690f,
146
686k
          0.3017929516615495f,
147
686k
          0.4082482904638627f,
148
686k
          0.1747866975480809f,
149
686k
          -0.2110560104933578f,
150
686k
          -0.1426608480880726f,
151
686k
          -0.1381354035075859f,
152
686k
          -0.1743760259965107f,
153
686k
          0.1135498731499434f,
154
686k
      },
155
686k
      {
156
686k
          0.2500000000000000,
157
686k
          -0.1014005039375375f,
158
686k
          0.4444481661973445f,
159
686k
          0.3085497062849767f,
160
686k
          0.0000000000000000f,
161
686k
          -0.0643507165794627f,
162
686k
          0.1585450355184006f,
163
686k
          0.5112616136591823f,
164
686k
          0.2579236279634118f,
165
686k
          0.0000000000000000,
166
686k
          0.0812611176717539f,
167
686k
          0.1856718091610980f,
168
686k
          -0.3416446842253372f,
169
686k
          0.3302282550303788f,
170
686k
          0.0702790691196284f,
171
686k
          -0.0741750459581035f,
172
686k
      },
173
686k
      {
174
686k
          0.2500000000000000,
175
686k
          0.2206518106944236f,
176
686k
          0.0000000000000000,
177
686k
          0.0000000000000000,
178
686k
          0.7071067811865476f,
179
686k
          0.6235485373547694f,
180
686k
          0.0000000000000000,
181
686k
          0.0000000000000000,
182
686k
          0.0000000000000000,
183
686k
          0.0000000000000000,
184
686k
          0.0000000000000000,
185
686k
          0.0000000000000000,
186
686k
          0.0000000000000000,
187
686k
          0.0000000000000000,
188
686k
          0.0000000000000000,
189
686k
          0.0000000000000000,
190
686k
      },
191
686k
      {
192
686k
          0.2500000000000000,
193
686k
          -0.1014005039375378f,
194
686k
          0.0000000000000000,
195
686k
          0.4706702258572536f,
196
686k
          0.0000000000000000,
197
686k
          -0.0643507165794628f,
198
686k
          -0.0403851516082220f,
199
686k
          0.0000000000000000,
200
686k
          0.1627234014286620f,
201
686k
          0.0000000000000000,
202
686k
          0.0000000000000000,
203
686k
          0.0000000000000000,
204
686k
          0.7367497537172237f,
205
686k
          0.0875511500058708f,
206
686k
          -0.2921026642334881f,
207
686k
          0.1940289303259434f,
208
686k
      },
209
686k
      {
210
686k
          0.2500000000000000,
211
686k
          -0.1014005039375377f,
212
686k
          0.1957439937204294f,
213
686k
          -0.1621205195722993f,
214
686k
          0.0000000000000000,
215
686k
          -0.0643507165794628f,
216
686k
          0.0074182263792424f,
217
686k
          -0.2904801297289980f,
218
686k
          0.0952002265347504f,
219
686k
          0.0000000000000000,
220
686k
          -0.3675398009862027f,
221
686k
          0.4921585901373873f,
222
686k
          0.2462710772207515f,
223
686k
          -0.0794670660590957f,
224
686k
          0.3623817333531167f,
225
686k
          -0.4351904965232280f,
226
686k
      },
227
686k
      {
228
686k
          0.2500000000000000,
229
686k
          -0.1014005039375376f,
230
686k
          0.2929100136981264f,
231
686k
          0.0000000000000000,
232
686k
          0.0000000000000000,
233
686k
          -0.0643507165794627f,
234
686k
          0.3935103426921017f,
235
686k
          -0.0657870154914280f,
236
686k
          0.0000000000000000,
237
686k
          -0.4082482904638628f,
238
686k
          -0.3078822139579090f,
239
686k
          -0.3852501370925192f,
240
686k
          -0.0857401903551931f,
241
686k
          -0.4613374887461511f,
242
686k
          0.0000000000000000,
243
686k
          0.2191868483885747f,
244
686k
      },
245
686k
      {
246
686k
          0.2500000000000000,
247
686k
          -0.1014005039375376f,
248
686k
          -0.4067007583026072f,
249
686k
          -0.2125574805828705f,
250
686k
          0.0000000000000000,
251
686k
          -0.0643507165794627f,
252
686k
          -0.4517556589999464f,
253
686k
          0.3046847507248840f,
254
686k
          0.3017929516615503f,
255
686k
          -0.4082482904638635f,
256
686k
          -0.1747866975480813f,
257
686k
          0.2110560104933581f,
258
686k
          -0.1426608480880734f,
259
686k
          -0.1381354035075829f,
260
686k
          -0.1743760259965108f,
261
686k
          0.1135498731499426f,
262
686k
      },
263
686k
      {
264
686k
          0.2500000000000000,
265
686k
          -0.1014005039375377f,
266
686k
          -0.1957439937204287f,
267
686k
          -0.1621205195722833f,
268
686k
          0.0000000000000000,
269
686k
          -0.0643507165794628f,
270
686k
          0.0074182263792444f,
271
686k
          0.2904801297290076f,
272
686k
          0.0952002265347505f,
273
686k
          0.0000000000000000,
274
686k
          0.3675398009862011f,
275
686k
          -0.4921585901373891f,
276
686k
          0.2462710772207514f,
277
686k
          -0.0794670660591026f,
278
686k
          0.3623817333531165f,
279
686k
          -0.4351904965232251f,
280
686k
      },
281
686k
      {
282
686k
          0.2500000000000000,
283
686k
          -0.1014005039375375f,
284
686k
          0.0000000000000000,
285
686k
          -0.4706702258572528f,
286
686k
          0.0000000000000000,
287
686k
          -0.0643507165794627f,
288
686k
          0.1107416575309343f,
289
686k
          0.0000000000000000,
290
686k
          -0.1627234014286617f,
291
686k
          0.0000000000000000,
292
686k
          0.0000000000000000,
293
686k
          0.0000000000000000,
294
686k
          0.1488339922711357f,
295
686k
          0.4972464710953509f,
296
686k
          0.2921026642334879f,
297
686k
          0.5550443808910661f,
298
686k
      },
299
686k
      {
300
686k
          0.2500000000000000,
301
686k
          -0.1014005039375377f,
302
686k
          0.1137907446044809f,
303
686k
          -0.1464291867126764f,
304
686k
          0.0000000000000000,
305
686k
          -0.0643507165794628f,
306
686k
          0.0829816309488205f,
307
686k
          -0.2388977352334460f,
308
686k
          -0.3531238544981630f,
309
686k
          -0.4082482904638630f,
310
686k
          0.4826689115059883f,
311
686k
          0.1741941265991622f,
312
686k
          -0.0476868035022925f,
313
686k
          0.1253805944856366f,
314
686k
          -0.4326608024727445f,
315
686k
          -0.2546827712406646f,
316
686k
      },
317
686k
      {
318
686k
          0.2500000000000000,
319
686k
          -0.1014005039375377f,
320
686k
          -0.4444481661973438f,
321
686k
          0.3085497062849487f,
322
686k
          0.0000000000000000,
323
686k
          -0.0643507165794628f,
324
686k
          0.1585450355183970f,
325
686k
          -0.5112616136592012f,
326
686k
          0.2579236279634129f,
327
686k
          0.0000000000000000,
328
686k
          -0.0812611176717504f,
329
686k
          -0.1856718091610990f,
330
686k
          -0.3416446842253373f,
331
686k
          0.3302282550303805f,
332
686k
          0.0702790691196282f,
333
686k
          -0.0741750459581023f,
334
686k
      },
335
686k
      {
336
686k
          0.2500000000000000,
337
686k
          -0.1014005039375376f,
338
686k
          -0.2929100136981264f,
339
686k
          0.0000000000000000,
340
686k
          0.0000000000000000,
341
686k
          -0.0643507165794627f,
342
686k
          0.3935103426921022f,
343
686k
          0.0657870154914254f,
344
686k
          0.0000000000000000,
345
686k
          0.4082482904638634f,
346
686k
          0.3078822139579031f,
347
686k
          0.3852501370925211f,
348
686k
          -0.0857401903551927f,
349
686k
          -0.4613374887461554f,
350
686k
          0.0000000000000000,
351
686k
          0.2191868483885728f,
352
686k
      },
353
686k
      {
354
686k
          0.2500000000000000,
355
686k
          -0.1014005039375376f,
356
686k
          -0.1137907446044814f,
357
686k
          -0.1464291867126654f,
358
686k
          0.0000000000000000,
359
686k
          -0.0643507165794627f,
360
686k
          0.0829816309488214f,
361
686k
          0.2388977352334547f,
362
686k
          -0.3531238544981624f,
363
686k
          0.4082482904638630f,
364
686k
          -0.4826689115059858f,
365
686k
          -0.1741941265991621f,
366
686k
          -0.0476868035022928f,
367
686k
          0.1253805944856431f,
368
686k
          -0.4326608024727457f,
369
686k
          -0.2546827712406641f,
370
686k
      },
371
686k
      {
372
686k
          0.2500000000000000,
373
686k
          -0.1014005039375374f,
374
686k
          0.0000000000000000,
375
686k
          0.4251149611657548f,
376
686k
          0.0000000000000000,
377
686k
          -0.0643507165794626f,
378
686k
          -0.4517556589999480f,
379
686k
          0.0000000000000000,
380
686k
          -0.6035859033230976f,
381
686k
          0.0000000000000000,
382
686k
          0.0000000000000000,
383
686k
          0.0000000000000000,
384
686k
          -0.1426608480880724f,
385
686k
          -0.1381354035075845f,
386
686k
          0.3487520519930227f,
387
686k
          0.1135498731499429f,
388
686k
      },
389
686k
  };
390
391
686k
  const HWY_CAPPED(float, 16) d;
392
2.05M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
1.37M
    auto scalar = Zero(d);
394
23.3M
    for (size_t j = 0; j < 16; j++) {
395
21.9M
      auto px = Set(d, pixels[j]);
396
21.9M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
21.9M
      scalar = MulAdd(px, basis, scalar);
398
21.9M
    }
399
1.37M
    Store(scalar, d, coeffs + i);
400
1.37M
  }
401
686k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
402
403
// Coefficient layout:
404
//  - (even, even) positions hold AFV coefficients
405
//  - (odd, even) positions hold DCT4x4 coefficients
406
//  - (any, odd) positions hold DCT4x8 coefficients
407
template <size_t afv_kind>
408
void AFVTransformFromPixels(const float* JXL_RESTRICT pixels,
409
                            size_t pixels_stride,
410
36.1M
                            float* JXL_RESTRICT coefficients) {
411
36.1M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
36.1M
  size_t afv_x = afv_kind & 1;
413
36.1M
  size_t afv_y = afv_kind / 2;
414
36.1M
  HWY_ALIGN float block[4 * 8] = {};
415
180M
  for (size_t iy = 0; iy < 4; iy++) {
416
723M
    for (size_t ix = 0; ix < 4; ix++) {
417
578M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
578M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
578M
    }
420
144M
  }
421
  // AFV coefficients in (even, even) positions.
422
36.1M
  HWY_ALIGN float coeff[4 * 4];
423
36.1M
  AFVDCT4x4(block, coeff);
424
180M
  for (size_t iy = 0; iy < 4; iy++) {
425
723M
    for (size_t ix = 0; ix < 4; ix++) {
426
578M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
578M
    }
428
144M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
36.1M
  ComputeScaledDCT<4, 4>()(
431
36.1M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
36.1M
              pixels_stride),
433
36.1M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
180M
  for (size_t iy = 0; iy < 4; iy++) {
436
1.30G
    for (size_t ix = 0; ix < 8; ix++) {
437
1.15G
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
1.15G
    }
439
144M
  }
440
  // 4x8 DCT of the other half of the block.
441
36.1M
  ComputeScaledDCT<4, 8>()(
442
36.1M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
36.1M
      block, scratch_space);
444
180M
  for (size_t iy = 0; iy < 4; iy++) {
445
1.30G
    for (size_t ix = 0; ix < 8; ix++) {
446
1.15G
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
1.15G
    }
448
144M
  }
449
36.1M
  float block00 = coefficients[0] * 0.25f;
450
36.1M
  float block01 = coefficients[1];
451
36.1M
  float block10 = coefficients[8];
452
36.1M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
36.1M
  coefficients[1] = (block00 - block01) * 0.5f;
454
36.1M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
36.1M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
178k
                            float* JXL_RESTRICT coefficients) {
411
178k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
178k
  size_t afv_x = afv_kind & 1;
413
178k
  size_t afv_y = afv_kind / 2;
414
178k
  HWY_ALIGN float block[4 * 8] = {};
415
894k
  for (size_t iy = 0; iy < 4; iy++) {
416
3.57M
    for (size_t ix = 0; ix < 4; ix++) {
417
2.86M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
2.86M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
2.86M
    }
420
715k
  }
421
  // AFV coefficients in (even, even) positions.
422
178k
  HWY_ALIGN float coeff[4 * 4];
423
178k
  AFVDCT4x4(block, coeff);
424
894k
  for (size_t iy = 0; iy < 4; iy++) {
425
3.57M
    for (size_t ix = 0; ix < 4; ix++) {
426
2.86M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
2.86M
    }
428
715k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
178k
  ComputeScaledDCT<4, 4>()(
431
178k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
178k
              pixels_stride),
433
178k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
894k
  for (size_t iy = 0; iy < 4; iy++) {
436
6.44M
    for (size_t ix = 0; ix < 8; ix++) {
437
5.72M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
5.72M
    }
439
715k
  }
440
  // 4x8 DCT of the other half of the block.
441
178k
  ComputeScaledDCT<4, 8>()(
442
178k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
178k
      block, scratch_space);
444
894k
  for (size_t iy = 0; iy < 4; iy++) {
445
6.44M
    for (size_t ix = 0; ix < 8; ix++) {
446
5.72M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
5.72M
    }
448
715k
  }
449
178k
  float block00 = coefficients[0] * 0.25f;
450
178k
  float block01 = coefficients[1];
451
178k
  float block10 = coefficients[8];
452
178k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
178k
  coefficients[1] = (block00 - block01) * 0.5f;
454
178k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
178k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
142k
                            float* JXL_RESTRICT coefficients) {
411
142k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
142k
  size_t afv_x = afv_kind & 1;
413
142k
  size_t afv_y = afv_kind / 2;
414
142k
  HWY_ALIGN float block[4 * 8] = {};
415
714k
  for (size_t iy = 0; iy < 4; iy++) {
416
2.85M
    for (size_t ix = 0; ix < 4; ix++) {
417
2.28M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
2.28M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
2.28M
    }
420
571k
  }
421
  // AFV coefficients in (even, even) positions.
422
142k
  HWY_ALIGN float coeff[4 * 4];
423
142k
  AFVDCT4x4(block, coeff);
424
714k
  for (size_t iy = 0; iy < 4; iy++) {
425
2.85M
    for (size_t ix = 0; ix < 4; ix++) {
426
2.28M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
2.28M
    }
428
571k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
142k
  ComputeScaledDCT<4, 4>()(
431
142k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
142k
              pixels_stride),
433
142k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
714k
  for (size_t iy = 0; iy < 4; iy++) {
436
5.14M
    for (size_t ix = 0; ix < 8; ix++) {
437
4.57M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
4.57M
    }
439
571k
  }
440
  // 4x8 DCT of the other half of the block.
441
142k
  ComputeScaledDCT<4, 8>()(
442
142k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
142k
      block, scratch_space);
444
714k
  for (size_t iy = 0; iy < 4; iy++) {
445
5.14M
    for (size_t ix = 0; ix < 8; ix++) {
446
4.57M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
4.57M
    }
448
571k
  }
449
142k
  float block00 = coefficients[0] * 0.25f;
450
142k
  float block01 = coefficients[1];
451
142k
  float block10 = coefficients[8];
452
142k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
142k
  coefficients[1] = (block00 - block01) * 0.5f;
454
142k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
142k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
170k
                            float* JXL_RESTRICT coefficients) {
411
170k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
170k
  size_t afv_x = afv_kind & 1;
413
170k
  size_t afv_y = afv_kind / 2;
414
170k
  HWY_ALIGN float block[4 * 8] = {};
415
852k
  for (size_t iy = 0; iy < 4; iy++) {
416
3.40M
    for (size_t ix = 0; ix < 4; ix++) {
417
2.72M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
2.72M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
2.72M
    }
420
681k
  }
421
  // AFV coefficients in (even, even) positions.
422
170k
  HWY_ALIGN float coeff[4 * 4];
423
170k
  AFVDCT4x4(block, coeff);
424
852k
  for (size_t iy = 0; iy < 4; iy++) {
425
3.40M
    for (size_t ix = 0; ix < 4; ix++) {
426
2.72M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
2.72M
    }
428
681k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
170k
  ComputeScaledDCT<4, 4>()(
431
170k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
170k
              pixels_stride),
433
170k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
852k
  for (size_t iy = 0; iy < 4; iy++) {
436
6.13M
    for (size_t ix = 0; ix < 8; ix++) {
437
5.45M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
5.45M
    }
439
681k
  }
440
  // 4x8 DCT of the other half of the block.
441
170k
  ComputeScaledDCT<4, 8>()(
442
170k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
170k
      block, scratch_space);
444
852k
  for (size_t iy = 0; iy < 4; iy++) {
445
6.13M
    for (size_t ix = 0; ix < 8; ix++) {
446
5.45M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
5.45M
    }
448
681k
  }
449
170k
  float block00 = coefficients[0] * 0.25f;
450
170k
  float block01 = coefficients[1];
451
170k
  float block10 = coefficients[8];
452
170k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
170k
  coefficients[1] = (block00 - block01) * 0.5f;
454
170k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
170k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
193k
                            float* JXL_RESTRICT coefficients) {
411
193k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
193k
  size_t afv_x = afv_kind & 1;
413
193k
  size_t afv_y = afv_kind / 2;
414
193k
  HWY_ALIGN float block[4 * 8] = {};
415
969k
  for (size_t iy = 0; iy < 4; iy++) {
416
3.87M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.10M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
3.10M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
3.10M
    }
420
775k
  }
421
  // AFV coefficients in (even, even) positions.
422
193k
  HWY_ALIGN float coeff[4 * 4];
423
193k
  AFVDCT4x4(block, coeff);
424
969k
  for (size_t iy = 0; iy < 4; iy++) {
425
3.87M
    for (size_t ix = 0; ix < 4; ix++) {
426
3.10M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
3.10M
    }
428
775k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
193k
  ComputeScaledDCT<4, 4>()(
431
193k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
193k
              pixels_stride),
433
193k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
969k
  for (size_t iy = 0; iy < 4; iy++) {
436
6.98M
    for (size_t ix = 0; ix < 8; ix++) {
437
6.20M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
6.20M
    }
439
775k
  }
440
  // 4x8 DCT of the other half of the block.
441
193k
  ComputeScaledDCT<4, 8>()(
442
193k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
193k
      block, scratch_space);
444
969k
  for (size_t iy = 0; iy < 4; iy++) {
445
6.98M
    for (size_t ix = 0; ix < 8; ix++) {
446
6.20M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
6.20M
    }
448
775k
  }
449
193k
  float block00 = coefficients[0] * 0.25f;
450
193k
  float block01 = coefficients[1];
451
193k
  float block10 = coefficients[8];
452
193k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
193k
  coefficients[1] = (block00 - block01) * 0.5f;
454
193k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
193k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
8.69M
                            float* JXL_RESTRICT coefficients) {
411
8.69M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
8.69M
  size_t afv_x = afv_kind & 1;
413
8.69M
  size_t afv_y = afv_kind / 2;
414
8.69M
  HWY_ALIGN float block[4 * 8] = {};
415
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
416
173M
    for (size_t ix = 0; ix < 4; ix++) {
417
139M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
139M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
139M
    }
420
34.7M
  }
421
  // AFV coefficients in (even, even) positions.
422
8.69M
  HWY_ALIGN float coeff[4 * 4];
423
8.69M
  AFVDCT4x4(block, coeff);
424
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
425
173M
    for (size_t ix = 0; ix < 4; ix++) {
426
139M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
139M
    }
428
34.7M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
8.69M
  ComputeScaledDCT<4, 4>()(
431
8.69M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
8.69M
              pixels_stride),
433
8.69M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
436
313M
    for (size_t ix = 0; ix < 8; ix++) {
437
278M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
278M
    }
439
34.7M
  }
440
  // 4x8 DCT of the other half of the block.
441
8.69M
  ComputeScaledDCT<4, 8>()(
442
8.69M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
8.69M
      block, scratch_space);
444
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
445
313M
    for (size_t ix = 0; ix < 8; ix++) {
446
278M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
278M
    }
448
34.7M
  }
449
8.69M
  float block00 = coefficients[0] * 0.25f;
450
8.69M
  float block01 = coefficients[1];
451
8.69M
  float block10 = coefficients[8];
452
8.69M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
8.69M
  coefficients[1] = (block00 - block01) * 0.5f;
454
8.69M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
8.69M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
8.69M
                            float* JXL_RESTRICT coefficients) {
411
8.69M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
8.69M
  size_t afv_x = afv_kind & 1;
413
8.69M
  size_t afv_y = afv_kind / 2;
414
8.69M
  HWY_ALIGN float block[4 * 8] = {};
415
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
416
173M
    for (size_t ix = 0; ix < 4; ix++) {
417
139M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
139M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
139M
    }
420
34.7M
  }
421
  // AFV coefficients in (even, even) positions.
422
8.69M
  HWY_ALIGN float coeff[4 * 4];
423
8.69M
  AFVDCT4x4(block, coeff);
424
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
425
173M
    for (size_t ix = 0; ix < 4; ix++) {
426
139M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
139M
    }
428
34.7M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
8.69M
  ComputeScaledDCT<4, 4>()(
431
8.69M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
8.69M
              pixels_stride),
433
8.69M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
436
313M
    for (size_t ix = 0; ix < 8; ix++) {
437
278M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
278M
    }
439
34.7M
  }
440
  // 4x8 DCT of the other half of the block.
441
8.69M
  ComputeScaledDCT<4, 8>()(
442
8.69M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
8.69M
      block, scratch_space);
444
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
445
313M
    for (size_t ix = 0; ix < 8; ix++) {
446
278M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
278M
    }
448
34.7M
  }
449
8.69M
  float block00 = coefficients[0] * 0.25f;
450
8.69M
  float block01 = coefficients[1];
451
8.69M
  float block10 = coefficients[8];
452
8.69M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
8.69M
  coefficients[1] = (block00 - block01) * 0.5f;
454
8.69M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
8.69M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
8.69M
                            float* JXL_RESTRICT coefficients) {
411
8.69M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
8.69M
  size_t afv_x = afv_kind & 1;
413
8.69M
  size_t afv_y = afv_kind / 2;
414
8.69M
  HWY_ALIGN float block[4 * 8] = {};
415
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
416
173M
    for (size_t ix = 0; ix < 4; ix++) {
417
139M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
139M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
139M
    }
420
34.7M
  }
421
  // AFV coefficients in (even, even) positions.
422
8.69M
  HWY_ALIGN float coeff[4 * 4];
423
8.69M
  AFVDCT4x4(block, coeff);
424
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
425
173M
    for (size_t ix = 0; ix < 4; ix++) {
426
139M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
139M
    }
428
34.7M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
8.69M
  ComputeScaledDCT<4, 4>()(
431
8.69M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
8.69M
              pixels_stride),
433
8.69M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
436
313M
    for (size_t ix = 0; ix < 8; ix++) {
437
278M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
278M
    }
439
34.7M
  }
440
  // 4x8 DCT of the other half of the block.
441
8.69M
  ComputeScaledDCT<4, 8>()(
442
8.69M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
8.69M
      block, scratch_space);
444
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
445
313M
    for (size_t ix = 0; ix < 8; ix++) {
446
278M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
278M
    }
448
34.7M
  }
449
8.69M
  float block00 = coefficients[0] * 0.25f;
450
8.69M
  float block01 = coefficients[1];
451
8.69M
  float block10 = coefficients[8];
452
8.69M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
8.69M
  coefficients[1] = (block00 - block01) * 0.5f;
454
8.69M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
8.69M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
8.69M
                            float* JXL_RESTRICT coefficients) {
411
8.69M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
8.69M
  size_t afv_x = afv_kind & 1;
413
8.69M
  size_t afv_y = afv_kind / 2;
414
8.69M
  HWY_ALIGN float block[4 * 8] = {};
415
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
416
173M
    for (size_t ix = 0; ix < 4; ix++) {
417
139M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
139M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
139M
    }
420
34.7M
  }
421
  // AFV coefficients in (even, even) positions.
422
8.69M
  HWY_ALIGN float coeff[4 * 4];
423
8.69M
  AFVDCT4x4(block, coeff);
424
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
425
173M
    for (size_t ix = 0; ix < 4; ix++) {
426
139M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
139M
    }
428
34.7M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
8.69M
  ComputeScaledDCT<4, 4>()(
431
8.69M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
8.69M
              pixels_stride),
433
8.69M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
436
313M
    for (size_t ix = 0; ix < 8; ix++) {
437
278M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
278M
    }
439
34.7M
  }
440
  // 4x8 DCT of the other half of the block.
441
8.69M
  ComputeScaledDCT<4, 8>()(
442
8.69M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
8.69M
      block, scratch_space);
444
43.4M
  for (size_t iy = 0; iy < 4; iy++) {
445
313M
    for (size_t ix = 0; ix < 8; ix++) {
446
278M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
278M
    }
448
34.7M
  }
449
8.69M
  float block00 = coefficients[0] * 0.25f;
450
8.69M
  float block01 = coefficients[1];
451
8.69M
  float block10 = coefficients[8];
452
8.69M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
8.69M
  coefficients[1] = (block00 - block01) * 0.5f;
454
8.69M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
8.69M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
178k
                            float* JXL_RESTRICT coefficients) {
411
178k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
178k
  size_t afv_x = afv_kind & 1;
413
178k
  size_t afv_y = afv_kind / 2;
414
178k
  HWY_ALIGN float block[4 * 8] = {};
415
894k
  for (size_t iy = 0; iy < 4; iy++) {
416
3.57M
    for (size_t ix = 0; ix < 4; ix++) {
417
2.86M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
2.86M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
2.86M
    }
420
715k
  }
421
  // AFV coefficients in (even, even) positions.
422
178k
  HWY_ALIGN float coeff[4 * 4];
423
178k
  AFVDCT4x4(block, coeff);
424
894k
  for (size_t iy = 0; iy < 4; iy++) {
425
3.57M
    for (size_t ix = 0; ix < 4; ix++) {
426
2.86M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
2.86M
    }
428
715k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
178k
  ComputeScaledDCT<4, 4>()(
431
178k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
178k
              pixels_stride),
433
178k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
894k
  for (size_t iy = 0; iy < 4; iy++) {
436
6.44M
    for (size_t ix = 0; ix < 8; ix++) {
437
5.72M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
5.72M
    }
439
715k
  }
440
  // 4x8 DCT of the other half of the block.
441
178k
  ComputeScaledDCT<4, 8>()(
442
178k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
178k
      block, scratch_space);
444
894k
  for (size_t iy = 0; iy < 4; iy++) {
445
6.44M
    for (size_t ix = 0; ix < 8; ix++) {
446
5.72M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
5.72M
    }
448
715k
  }
449
178k
  float block00 = coefficients[0] * 0.25f;
450
178k
  float block01 = coefficients[1];
451
178k
  float block10 = coefficients[8];
452
178k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
178k
  coefficients[1] = (block00 - block01) * 0.5f;
454
178k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
178k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
142k
                            float* JXL_RESTRICT coefficients) {
411
142k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
142k
  size_t afv_x = afv_kind & 1;
413
142k
  size_t afv_y = afv_kind / 2;
414
142k
  HWY_ALIGN float block[4 * 8] = {};
415
714k
  for (size_t iy = 0; iy < 4; iy++) {
416
2.85M
    for (size_t ix = 0; ix < 4; ix++) {
417
2.28M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
2.28M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
2.28M
    }
420
571k
  }
421
  // AFV coefficients in (even, even) positions.
422
142k
  HWY_ALIGN float coeff[4 * 4];
423
142k
  AFVDCT4x4(block, coeff);
424
714k
  for (size_t iy = 0; iy < 4; iy++) {
425
2.85M
    for (size_t ix = 0; ix < 4; ix++) {
426
2.28M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
2.28M
    }
428
571k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
142k
  ComputeScaledDCT<4, 4>()(
431
142k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
142k
              pixels_stride),
433
142k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
714k
  for (size_t iy = 0; iy < 4; iy++) {
436
5.14M
    for (size_t ix = 0; ix < 8; ix++) {
437
4.57M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
4.57M
    }
439
571k
  }
440
  // 4x8 DCT of the other half of the block.
441
142k
  ComputeScaledDCT<4, 8>()(
442
142k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
142k
      block, scratch_space);
444
714k
  for (size_t iy = 0; iy < 4; iy++) {
445
5.14M
    for (size_t ix = 0; ix < 8; ix++) {
446
4.57M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
4.57M
    }
448
571k
  }
449
142k
  float block00 = coefficients[0] * 0.25f;
450
142k
  float block01 = coefficients[1];
451
142k
  float block10 = coefficients[8];
452
142k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
142k
  coefficients[1] = (block00 - block01) * 0.5f;
454
142k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
142k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
170k
                            float* JXL_RESTRICT coefficients) {
411
170k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
170k
  size_t afv_x = afv_kind & 1;
413
170k
  size_t afv_y = afv_kind / 2;
414
170k
  HWY_ALIGN float block[4 * 8] = {};
415
852k
  for (size_t iy = 0; iy < 4; iy++) {
416
3.40M
    for (size_t ix = 0; ix < 4; ix++) {
417
2.72M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
2.72M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
2.72M
    }
420
681k
  }
421
  // AFV coefficients in (even, even) positions.
422
170k
  HWY_ALIGN float coeff[4 * 4];
423
170k
  AFVDCT4x4(block, coeff);
424
852k
  for (size_t iy = 0; iy < 4; iy++) {
425
3.40M
    for (size_t ix = 0; ix < 4; ix++) {
426
2.72M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
2.72M
    }
428
681k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
170k
  ComputeScaledDCT<4, 4>()(
431
170k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
170k
              pixels_stride),
433
170k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
852k
  for (size_t iy = 0; iy < 4; iy++) {
436
6.13M
    for (size_t ix = 0; ix < 8; ix++) {
437
5.45M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
5.45M
    }
439
681k
  }
440
  // 4x8 DCT of the other half of the block.
441
170k
  ComputeScaledDCT<4, 8>()(
442
170k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
170k
      block, scratch_space);
444
852k
  for (size_t iy = 0; iy < 4; iy++) {
445
6.13M
    for (size_t ix = 0; ix < 8; ix++) {
446
5.45M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
5.45M
    }
448
681k
  }
449
170k
  float block00 = coefficients[0] * 0.25f;
450
170k
  float block01 = coefficients[1];
451
170k
  float block10 = coefficients[8];
452
170k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
170k
  coefficients[1] = (block00 - block01) * 0.5f;
454
170k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
170k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
193k
                            float* JXL_RESTRICT coefficients) {
411
193k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
193k
  size_t afv_x = afv_kind & 1;
413
193k
  size_t afv_y = afv_kind / 2;
414
193k
  HWY_ALIGN float block[4 * 8] = {};
415
969k
  for (size_t iy = 0; iy < 4; iy++) {
416
3.87M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.10M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
3.10M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
3.10M
    }
420
775k
  }
421
  // AFV coefficients in (even, even) positions.
422
193k
  HWY_ALIGN float coeff[4 * 4];
423
193k
  AFVDCT4x4(block, coeff);
424
969k
  for (size_t iy = 0; iy < 4; iy++) {
425
3.87M
    for (size_t ix = 0; ix < 4; ix++) {
426
3.10M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
3.10M
    }
428
775k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
193k
  ComputeScaledDCT<4, 4>()(
431
193k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
193k
              pixels_stride),
433
193k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
969k
  for (size_t iy = 0; iy < 4; iy++) {
436
6.98M
    for (size_t ix = 0; ix < 8; ix++) {
437
6.20M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
6.20M
    }
439
775k
  }
440
  // 4x8 DCT of the other half of the block.
441
193k
  ComputeScaledDCT<4, 8>()(
442
193k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
193k
      block, scratch_space);
444
969k
  for (size_t iy = 0; iy < 4; iy++) {
445
6.98M
    for (size_t ix = 0; ix < 8; ix++) {
446
6.20M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
6.20M
    }
448
775k
  }
449
193k
  float block00 = coefficients[0] * 0.25f;
450
193k
  float block01 = coefficients[1];
451
193k
  float block10 = coefficients[8];
452
193k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
193k
  coefficients[1] = (block00 - block01) * 0.5f;
454
193k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
193k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
456
457
HWY_MAYBE_UNUSED void TransformFromPixels(const AcStrategyType strategy,
458
                                          const float* JXL_RESTRICT pixels,
459
                                          size_t pixels_stride,
460
                                          float* JXL_RESTRICT coefficients,
461
127M
                                          float* JXL_RESTRICT scratch_space) {
462
127M
  using Type = AcStrategyType;
463
127M
  switch (strategy) {
464
10.2M
    case Type::IDENTITY: {
465
30.7M
      for (size_t y = 0; y < 2; y++) {
466
61.5M
        for (size_t x = 0; x < 2; x++) {
467
41.0M
          float block_dc = 0;
468
205M
          for (size_t iy = 0; iy < 4; iy++) {
469
820M
            for (size_t ix = 0; ix < 4; ix++) {
470
656M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
656M
            }
472
164M
          }
473
41.0M
          block_dc *= 1.0f / 16;
474
205M
          for (size_t iy = 0; iy < 4; iy++) {
475
820M
            for (size_t ix = 0; ix < 4; ix++) {
476
656M
              if (ix == 1 && iy == 1) continue;
477
615M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
615M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
615M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
615M
            }
481
164M
          }
482
41.0M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
41.0M
          coefficients[y * 8 + x] = block_dc;
484
41.0M
        }
485
20.5M
      }
486
10.2M
      float block00 = coefficients[0];
487
10.2M
      float block01 = coefficients[1];
488
10.2M
      float block10 = coefficients[8];
489
10.2M
      float block11 = coefficients[9];
490
10.2M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
10.2M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
10.2M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
10.2M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
10.2M
      break;
495
0
    }
496
9.18M
    case Type::DCT8X4: {
497
27.5M
      for (size_t x = 0; x < 2; x++) {
498
18.3M
        HWY_ALIGN float block[4 * 8];
499
18.3M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
18.3M
                                 scratch_space);
501
91.8M
        for (size_t iy = 0; iy < 4; iy++) {
502
661M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
587M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
587M
          }
506
73.4M
        }
507
18.3M
      }
508
9.18M
      float block0 = coefficients[0];
509
9.18M
      float block1 = coefficients[8];
510
9.18M
      coefficients[0] = (block0 + block1) * 0.5f;
511
9.18M
      coefficients[8] = (block0 - block1) * 0.5f;
512
9.18M
      break;
513
0
    }
514
8.92M
    case Type::DCT4X8: {
515
26.7M
      for (size_t y = 0; y < 2; y++) {
516
17.8M
        HWY_ALIGN float block[4 * 8];
517
17.8M
        ComputeScaledDCT<4, 8>()(
518
17.8M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
17.8M
            scratch_space);
520
89.2M
        for (size_t iy = 0; iy < 4; iy++) {
521
642M
          for (size_t ix = 0; ix < 8; ix++) {
522
571M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
571M
          }
524
71.3M
        }
525
17.8M
      }
526
8.92M
      float block0 = coefficients[0];
527
8.92M
      float block1 = coefficients[8];
528
8.92M
      coefficients[0] = (block0 + block1) * 0.5f;
529
8.92M
      coefficients[8] = (block0 - block1) * 0.5f;
530
8.92M
      break;
531
0
    }
532
8.70M
    case Type::DCT4X4: {
533
26.1M
      for (size_t y = 0; y < 2; y++) {
534
52.2M
        for (size_t x = 0; x < 2; x++) {
535
34.8M
          HWY_ALIGN float block[4 * 4];
536
34.8M
          ComputeScaledDCT<4, 4>()(
537
34.8M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
34.8M
              block, scratch_space);
539
174M
          for (size_t iy = 0; iy < 4; iy++) {
540
696M
            for (size_t ix = 0; ix < 4; ix++) {
541
556M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
556M
            }
543
139M
          }
544
34.8M
        }
545
17.4M
      }
546
8.70M
      float block00 = coefficients[0];
547
8.70M
      float block01 = coefficients[1];
548
8.70M
      float block10 = coefficients[8];
549
8.70M
      float block11 = coefficients[9];
550
8.70M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
8.70M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
8.70M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
8.70M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
8.70M
      break;
555
0
    }
556
10.3M
    case Type::DCT2X2: {
557
10.3M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
10.3M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
10.3M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
10.3M
      break;
561
0
    }
562
3.97M
    case Type::DCT16X16: {
563
3.97M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
3.97M
                                 scratch_space);
565
3.97M
      break;
566
0
    }
567
7.62M
    case Type::DCT16X8: {
568
7.62M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
7.62M
                                scratch_space);
570
7.62M
      break;
571
0
    }
572
7.73M
    case Type::DCT8X16: {
573
7.73M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
7.73M
                                scratch_space);
575
7.73M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
1.53M
    case Type::DCT32X16: {
588
1.53M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
1.53M
                                 scratch_space);
590
1.53M
      break;
591
0
    }
592
1.59M
    case Type::DCT16X32: {
593
1.59M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
1.59M
                                 scratch_space);
595
1.59M
      break;
596
0
    }
597
828k
    case Type::DCT32X32: {
598
828k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
828k
                                 scratch_space);
600
828k
      break;
601
0
    }
602
19.9M
    case Type::DCT: {
603
19.9M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
19.9M
                               scratch_space);
605
19.9M
      break;
606
0
    }
607
9.05M
    case Type::AFV0: {
608
9.05M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
9.05M
      break;
610
0
    }
611
8.98M
    case Type::AFV1: {
612
8.98M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
8.98M
      break;
614
0
    }
615
9.03M
    case Type::AFV2: {
616
9.03M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
9.03M
      break;
618
0
    }
619
9.08M
    case Type::AFV3: {
620
9.08M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
9.08M
      break;
622
0
    }
623
161k
    case Type::DCT64X64: {
624
161k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
161k
                                 scratch_space);
626
161k
      break;
627
0
    }
628
462k
    case Type::DCT64X32: {
629
462k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
462k
                                 scratch_space);
631
462k
      break;
632
0
    }
633
323k
    case Type::DCT32X64: {
634
323k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
323k
                                 scratch_space);
636
323k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
127M
  }
669
127M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
4.77M
                                          float* JXL_RESTRICT scratch_space) {
462
4.77M
  using Type = AcStrategyType;
463
4.77M
  switch (strategy) {
464
781k
    case Type::IDENTITY: {
465
2.34M
      for (size_t y = 0; y < 2; y++) {
466
4.68M
        for (size_t x = 0; x < 2; x++) {
467
3.12M
          float block_dc = 0;
468
15.6M
          for (size_t iy = 0; iy < 4; iy++) {
469
62.4M
            for (size_t ix = 0; ix < 4; ix++) {
470
49.9M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
49.9M
            }
472
12.4M
          }
473
3.12M
          block_dc *= 1.0f / 16;
474
15.6M
          for (size_t iy = 0; iy < 4; iy++) {
475
62.4M
            for (size_t ix = 0; ix < 4; ix++) {
476
49.9M
              if (ix == 1 && iy == 1) continue;
477
46.8M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
46.8M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
46.8M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
46.8M
            }
481
12.4M
          }
482
3.12M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
3.12M
          coefficients[y * 8 + x] = block_dc;
484
3.12M
        }
485
1.56M
      }
486
781k
      float block00 = coefficients[0];
487
781k
      float block01 = coefficients[1];
488
781k
      float block10 = coefficients[8];
489
781k
      float block11 = coefficients[9];
490
781k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
781k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
781k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
781k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
781k
      break;
495
0
    }
496
243k
    case Type::DCT8X4: {
497
729k
      for (size_t x = 0; x < 2; x++) {
498
486k
        HWY_ALIGN float block[4 * 8];
499
486k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
486k
                                 scratch_space);
501
2.43M
        for (size_t iy = 0; iy < 4; iy++) {
502
17.5M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
15.5M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
15.5M
          }
506
1.94M
        }
507
486k
      }
508
243k
      float block0 = coefficients[0];
509
243k
      float block1 = coefficients[8];
510
243k
      coefficients[0] = (block0 + block1) * 0.5f;
511
243k
      coefficients[8] = (block0 - block1) * 0.5f;
512
243k
      break;
513
0
    }
514
111k
    case Type::DCT4X8: {
515
335k
      for (size_t y = 0; y < 2; y++) {
516
223k
        HWY_ALIGN float block[4 * 8];
517
223k
        ComputeScaledDCT<4, 8>()(
518
223k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
223k
            scratch_space);
520
1.11M
        for (size_t iy = 0; iy < 4; iy++) {
521
8.04M
          for (size_t ix = 0; ix < 8; ix++) {
522
7.15M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
7.15M
          }
524
894k
        }
525
223k
      }
526
111k
      float block0 = coefficients[0];
527
111k
      float block1 = coefficients[8];
528
111k
      coefficients[0] = (block0 + block1) * 0.5f;
529
111k
      coefficients[8] = (block0 - block1) * 0.5f;
530
111k
      break;
531
0
    }
532
1.12k
    case Type::DCT4X4: {
533
3.38k
      for (size_t y = 0; y < 2; y++) {
534
6.76k
        for (size_t x = 0; x < 2; x++) {
535
4.51k
          HWY_ALIGN float block[4 * 4];
536
4.51k
          ComputeScaledDCT<4, 4>()(
537
4.51k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
4.51k
              block, scratch_space);
539
22.5k
          for (size_t iy = 0; iy < 4; iy++) {
540
90.2k
            for (size_t ix = 0; ix < 4; ix++) {
541
72.1k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
72.1k
            }
543
18.0k
          }
544
4.51k
        }
545
2.25k
      }
546
1.12k
      float block00 = coefficients[0];
547
1.12k
      float block01 = coefficients[1];
548
1.12k
      float block10 = coefficients[8];
549
1.12k
      float block11 = coefficients[9];
550
1.12k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
1.12k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
1.12k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
1.12k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
1.12k
      break;
555
0
    }
556
826k
    case Type::DCT2X2: {
557
826k
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
826k
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
826k
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
826k
      break;
561
0
    }
562
145k
    case Type::DCT16X16: {
563
145k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
145k
                                 scratch_space);
565
145k
      break;
566
0
    }
567
236k
    case Type::DCT16X8: {
568
236k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
236k
                                scratch_space);
570
236k
      break;
571
0
    }
572
279k
    case Type::DCT8X16: {
573
279k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
279k
                                scratch_space);
575
279k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
41.0k
    case Type::DCT32X16: {
588
41.0k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
41.0k
                                 scratch_space);
590
41.0k
      break;
591
0
    }
592
70.2k
    case Type::DCT16X32: {
593
70.2k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
70.2k
                                 scratch_space);
595
70.2k
      break;
596
0
    }
597
38.9k
    case Type::DCT32X32: {
598
38.9k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
38.9k
                                 scratch_space);
600
38.9k
      break;
601
0
    }
602
1.28M
    case Type::DCT: {
603
1.28M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
1.28M
                               scratch_space);
605
1.28M
      break;
606
0
    }
607
178k
    case Type::AFV0: {
608
178k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
178k
      break;
610
0
    }
611
142k
    case Type::AFV1: {
612
142k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
142k
      break;
614
0
    }
615
170k
    case Type::AFV2: {
616
170k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
170k
      break;
618
0
    }
619
193k
    case Type::AFV3: {
620
193k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
193k
      break;
622
0
    }
623
19.5k
    case Type::DCT64X64: {
624
19.5k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
19.5k
                                 scratch_space);
626
19.5k
      break;
627
0
    }
628
4.76k
    case Type::DCT64X32: {
629
4.76k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
4.76k
                                 scratch_space);
631
4.76k
      break;
632
0
    }
633
7.35k
    case Type::DCT32X64: {
634
7.35k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
7.35k
                                 scratch_space);
636
7.35k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
4.77M
  }
669
4.77M
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
109M
                                          float* JXL_RESTRICT scratch_space) {
462
109M
  using Type = AcStrategyType;
463
109M
  switch (strategy) {
464
8.69M
    case Type::IDENTITY: {
465
26.0M
      for (size_t y = 0; y < 2; y++) {
466
52.1M
        for (size_t x = 0; x < 2; x++) {
467
34.7M
          float block_dc = 0;
468
173M
          for (size_t iy = 0; iy < 4; iy++) {
469
695M
            for (size_t ix = 0; ix < 4; ix++) {
470
556M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
556M
            }
472
139M
          }
473
34.7M
          block_dc *= 1.0f / 16;
474
173M
          for (size_t iy = 0; iy < 4; iy++) {
475
695M
            for (size_t ix = 0; ix < 4; ix++) {
476
556M
              if (ix == 1 && iy == 1) continue;
477
521M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
521M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
521M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
521M
            }
481
139M
          }
482
34.7M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
34.7M
          coefficients[y * 8 + x] = block_dc;
484
34.7M
        }
485
17.3M
      }
486
8.69M
      float block00 = coefficients[0];
487
8.69M
      float block01 = coefficients[1];
488
8.69M
      float block10 = coefficients[8];
489
8.69M
      float block11 = coefficients[9];
490
8.69M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
8.69M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
8.69M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
8.69M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
8.69M
      break;
495
0
    }
496
8.69M
    case Type::DCT8X4: {
497
26.0M
      for (size_t x = 0; x < 2; x++) {
498
17.3M
        HWY_ALIGN float block[4 * 8];
499
17.3M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
17.3M
                                 scratch_space);
501
86.9M
        for (size_t iy = 0; iy < 4; iy++) {
502
626M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
556M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
556M
          }
506
69.5M
        }
507
17.3M
      }
508
8.69M
      float block0 = coefficients[0];
509
8.69M
      float block1 = coefficients[8];
510
8.69M
      coefficients[0] = (block0 + block1) * 0.5f;
511
8.69M
      coefficients[8] = (block0 - block1) * 0.5f;
512
8.69M
      break;
513
0
    }
514
8.69M
    case Type::DCT4X8: {
515
26.0M
      for (size_t y = 0; y < 2; y++) {
516
17.3M
        HWY_ALIGN float block[4 * 8];
517
17.3M
        ComputeScaledDCT<4, 8>()(
518
17.3M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
17.3M
            scratch_space);
520
86.9M
        for (size_t iy = 0; iy < 4; iy++) {
521
626M
          for (size_t ix = 0; ix < 8; ix++) {
522
556M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
556M
          }
524
69.5M
        }
525
17.3M
      }
526
8.69M
      float block0 = coefficients[0];
527
8.69M
      float block1 = coefficients[8];
528
8.69M
      coefficients[0] = (block0 + block1) * 0.5f;
529
8.69M
      coefficients[8] = (block0 - block1) * 0.5f;
530
8.69M
      break;
531
0
    }
532
8.69M
    case Type::DCT4X4: {
533
26.0M
      for (size_t y = 0; y < 2; y++) {
534
52.1M
        for (size_t x = 0; x < 2; x++) {
535
34.7M
          HWY_ALIGN float block[4 * 4];
536
34.7M
          ComputeScaledDCT<4, 4>()(
537
34.7M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
34.7M
              block, scratch_space);
539
173M
          for (size_t iy = 0; iy < 4; iy++) {
540
695M
            for (size_t ix = 0; ix < 4; ix++) {
541
556M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
556M
            }
543
139M
          }
544
34.7M
        }
545
17.3M
      }
546
8.69M
      float block00 = coefficients[0];
547
8.69M
      float block01 = coefficients[1];
548
8.69M
      float block10 = coefficients[8];
549
8.69M
      float block11 = coefficients[9];
550
8.69M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
8.69M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
8.69M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
8.69M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
8.69M
      break;
555
0
    }
556
8.69M
    case Type::DCT2X2: {
557
8.69M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
8.69M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
8.69M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
8.69M
      break;
561
0
    }
562
3.68M
    case Type::DCT16X16: {
563
3.68M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
3.68M
                                 scratch_space);
565
3.68M
      break;
566
0
    }
567
7.15M
    case Type::DCT16X8: {
568
7.15M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
7.15M
                                scratch_space);
570
7.15M
      break;
571
0
    }
572
7.17M
    case Type::DCT8X16: {
573
7.17M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
7.17M
                                scratch_space);
575
7.17M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
1.45M
    case Type::DCT32X16: {
588
1.45M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
1.45M
                                 scratch_space);
590
1.45M
      break;
591
0
    }
592
1.45M
    case Type::DCT16X32: {
593
1.45M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
1.45M
                                 scratch_space);
595
1.45M
      break;
596
0
    }
597
750k
    case Type::DCT32X32: {
598
750k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
750k
                                 scratch_space);
600
750k
      break;
601
0
    }
602
8.69M
    case Type::DCT: {
603
8.69M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
8.69M
                               scratch_space);
605
8.69M
      break;
606
0
    }
607
8.69M
    case Type::AFV0: {
608
8.69M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
8.69M
      break;
610
0
    }
611
8.69M
    case Type::AFV1: {
612
8.69M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
8.69M
      break;
614
0
    }
615
8.69M
    case Type::AFV2: {
616
8.69M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
8.69M
      break;
618
0
    }
619
8.69M
    case Type::AFV3: {
620
8.69M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
8.69M
      break;
622
0
    }
623
121k
    case Type::DCT64X64: {
624
121k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
121k
                                 scratch_space);
626
121k
      break;
627
0
    }
628
452k
    case Type::DCT64X32: {
629
452k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
452k
                                 scratch_space);
631
452k
      break;
632
0
    }
633
308k
    case Type::DCT32X64: {
634
308k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
308k
                                 scratch_space);
636
308k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
109M
  }
669
109M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
13.4M
                                          float* JXL_RESTRICT scratch_space) {
462
13.4M
  using Type = AcStrategyType;
463
13.4M
  switch (strategy) {
464
781k
    case Type::IDENTITY: {
465
2.34M
      for (size_t y = 0; y < 2; y++) {
466
4.68M
        for (size_t x = 0; x < 2; x++) {
467
3.12M
          float block_dc = 0;
468
15.6M
          for (size_t iy = 0; iy < 4; iy++) {
469
62.4M
            for (size_t ix = 0; ix < 4; ix++) {
470
49.9M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
49.9M
            }
472
12.4M
          }
473
3.12M
          block_dc *= 1.0f / 16;
474
15.6M
          for (size_t iy = 0; iy < 4; iy++) {
475
62.4M
            for (size_t ix = 0; ix < 4; ix++) {
476
49.9M
              if (ix == 1 && iy == 1) continue;
477
46.8M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
46.8M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
46.8M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
46.8M
            }
481
12.4M
          }
482
3.12M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
3.12M
          coefficients[y * 8 + x] = block_dc;
484
3.12M
        }
485
1.56M
      }
486
781k
      float block00 = coefficients[0];
487
781k
      float block01 = coefficients[1];
488
781k
      float block10 = coefficients[8];
489
781k
      float block11 = coefficients[9];
490
781k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
781k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
781k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
781k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
781k
      break;
495
0
    }
496
243k
    case Type::DCT8X4: {
497
729k
      for (size_t x = 0; x < 2; x++) {
498
486k
        HWY_ALIGN float block[4 * 8];
499
486k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
486k
                                 scratch_space);
501
2.43M
        for (size_t iy = 0; iy < 4; iy++) {
502
17.5M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
15.5M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
15.5M
          }
506
1.94M
        }
507
486k
      }
508
243k
      float block0 = coefficients[0];
509
243k
      float block1 = coefficients[8];
510
243k
      coefficients[0] = (block0 + block1) * 0.5f;
511
243k
      coefficients[8] = (block0 - block1) * 0.5f;
512
243k
      break;
513
0
    }
514
111k
    case Type::DCT4X8: {
515
335k
      for (size_t y = 0; y < 2; y++) {
516
223k
        HWY_ALIGN float block[4 * 8];
517
223k
        ComputeScaledDCT<4, 8>()(
518
223k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
223k
            scratch_space);
520
1.11M
        for (size_t iy = 0; iy < 4; iy++) {
521
8.04M
          for (size_t ix = 0; ix < 8; ix++) {
522
7.15M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
7.15M
          }
524
894k
        }
525
223k
      }
526
111k
      float block0 = coefficients[0];
527
111k
      float block1 = coefficients[8];
528
111k
      coefficients[0] = (block0 + block1) * 0.5f;
529
111k
      coefficients[8] = (block0 - block1) * 0.5f;
530
111k
      break;
531
0
    }
532
1.12k
    case Type::DCT4X4: {
533
3.38k
      for (size_t y = 0; y < 2; y++) {
534
6.76k
        for (size_t x = 0; x < 2; x++) {
535
4.51k
          HWY_ALIGN float block[4 * 4];
536
4.51k
          ComputeScaledDCT<4, 4>()(
537
4.51k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
4.51k
              block, scratch_space);
539
22.5k
          for (size_t iy = 0; iy < 4; iy++) {
540
90.2k
            for (size_t ix = 0; ix < 4; ix++) {
541
72.1k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
72.1k
            }
543
18.0k
          }
544
4.51k
        }
545
2.25k
      }
546
1.12k
      float block00 = coefficients[0];
547
1.12k
      float block01 = coefficients[1];
548
1.12k
      float block10 = coefficients[8];
549
1.12k
      float block11 = coefficients[9];
550
1.12k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
1.12k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
1.12k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
1.12k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
1.12k
      break;
555
0
    }
556
826k
    case Type::DCT2X2: {
557
826k
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
826k
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
826k
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
826k
      break;
561
0
    }
562
145k
    case Type::DCT16X16: {
563
145k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
145k
                                 scratch_space);
565
145k
      break;
566
0
    }
567
236k
    case Type::DCT16X8: {
568
236k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
236k
                                scratch_space);
570
236k
      break;
571
0
    }
572
279k
    case Type::DCT8X16: {
573
279k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
279k
                                scratch_space);
575
279k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
41.0k
    case Type::DCT32X16: {
588
41.0k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
41.0k
                                 scratch_space);
590
41.0k
      break;
591
0
    }
592
70.2k
    case Type::DCT16X32: {
593
70.2k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
70.2k
                                 scratch_space);
595
70.2k
      break;
596
0
    }
597
38.9k
    case Type::DCT32X32: {
598
38.9k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
38.9k
                                 scratch_space);
600
38.9k
      break;
601
0
    }
602
9.98M
    case Type::DCT: {
603
9.98M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
9.98M
                               scratch_space);
605
9.98M
      break;
606
0
    }
607
178k
    case Type::AFV0: {
608
178k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
178k
      break;
610
0
    }
611
142k
    case Type::AFV1: {
612
142k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
142k
      break;
614
0
    }
615
170k
    case Type::AFV2: {
616
170k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
170k
      break;
618
0
    }
619
193k
    case Type::AFV3: {
620
193k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
193k
      break;
622
0
    }
623
19.5k
    case Type::DCT64X64: {
624
19.5k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
19.5k
                                 scratch_space);
626
19.5k
      break;
627
0
    }
628
4.76k
    case Type::DCT64X32: {
629
4.76k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
4.76k
                                 scratch_space);
631
4.76k
      break;
632
0
    }
633
7.35k
    case Type::DCT32X64: {
634
7.35k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
7.35k
                                 scratch_space);
636
7.35k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
13.4M
  }
669
13.4M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
670
671
// `scratch_space` should be at least 4 * kMaxBlocks * kMaxBlocks elements.
672
HWY_MAYBE_UNUSED void DCFromLowestFrequencies(const AcStrategyType strategy,
673
                                              const float* block, float* dc,
674
                                              size_t dc_stride,
675
18.2M
                                              float* scratch_space) {
676
18.2M
  using Type = AcStrategyType;
677
18.2M
  switch (strategy) {
678
472k
    case Type::DCT16X8: {
679
472k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
472k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
472k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
472k
      break;
683
0
    }
684
559k
    case Type::DCT8X16: {
685
559k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
559k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
559k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
559k
      break;
689
0
    }
690
290k
    case Type::DCT16X16: {
691
290k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
290k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
290k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
290k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
82.0k
    case Type::DCT32X16: {
709
82.0k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
82.0k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
82.0k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
82.0k
      break;
713
0
    }
714
140k
    case Type::DCT16X32: {
715
140k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
140k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
140k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
140k
      break;
719
0
    }
720
77.8k
    case Type::DCT32X32: {
721
77.8k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
77.8k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
77.8k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
77.8k
      break;
725
0
    }
726
9.53k
    case Type::DCT64X32: {
727
9.53k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
9.53k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
9.53k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
9.53k
      break;
731
0
    }
732
14.7k
    case Type::DCT32X64: {
733
14.7k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
14.7k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
14.7k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
14.7k
      break;
737
0
    }
738
39.1k
    case Type::DCT64X64: {
739
39.1k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
39.1k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
39.1k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
39.1k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
11.2M
    case Type::DCT:
787
12.9M
    case Type::DCT2X2:
788
12.9M
    case Type::DCT4X4:
789
13.1M
    case Type::DCT4X8:
790
13.6M
    case Type::DCT8X4:
791
13.9M
    case Type::AFV0:
792
14.2M
    case Type::AFV1:
793
14.6M
    case Type::AFV2:
794
15.0M
    case Type::AFV3:
795
16.5M
    case Type::IDENTITY:
796
16.5M
      dc[0] = block[0];
797
16.5M
      break;
798
18.2M
  }
799
18.2M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
4.77M
                                              float* scratch_space) {
676
4.77M
  using Type = AcStrategyType;
677
4.77M
  switch (strategy) {
678
236k
    case Type::DCT16X8: {
679
236k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
236k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
236k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
236k
      break;
683
0
    }
684
279k
    case Type::DCT8X16: {
685
279k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
279k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
279k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
279k
      break;
689
0
    }
690
145k
    case Type::DCT16X16: {
691
145k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
145k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
145k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
145k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
41.0k
    case Type::DCT32X16: {
709
41.0k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
41.0k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
41.0k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
41.0k
      break;
713
0
    }
714
70.2k
    case Type::DCT16X32: {
715
70.2k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
70.2k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
70.2k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
70.2k
      break;
719
0
    }
720
38.9k
    case Type::DCT32X32: {
721
38.9k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
38.9k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
38.9k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
38.9k
      break;
725
0
    }
726
4.76k
    case Type::DCT64X32: {
727
4.76k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
4.76k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
4.76k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
4.76k
      break;
731
0
    }
732
7.35k
    case Type::DCT32X64: {
733
7.35k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
7.35k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
7.35k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
7.35k
      break;
737
0
    }
738
19.5k
    case Type::DCT64X64: {
739
19.5k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
19.5k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
19.5k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
19.5k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
1.28M
    case Type::DCT:
787
2.10M
    case Type::DCT2X2:
788
2.11M
    case Type::DCT4X4:
789
2.22M
    case Type::DCT4X8:
790
2.46M
    case Type::DCT8X4:
791
2.64M
    case Type::AFV0:
792
2.78M
    case Type::AFV1:
793
2.95M
    case Type::AFV2:
794
3.15M
    case Type::AFV3:
795
3.93M
    case Type::IDENTITY:
796
3.93M
      dc[0] = block[0];
797
3.93M
      break;
798
4.77M
  }
799
4.77M
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
13.4M
                                              float* scratch_space) {
676
13.4M
  using Type = AcStrategyType;
677
13.4M
  switch (strategy) {
678
236k
    case Type::DCT16X8: {
679
236k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
236k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
236k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
236k
      break;
683
0
    }
684
279k
    case Type::DCT8X16: {
685
279k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
279k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
279k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
279k
      break;
689
0
    }
690
145k
    case Type::DCT16X16: {
691
145k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
145k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
145k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
145k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
41.0k
    case Type::DCT32X16: {
709
41.0k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
41.0k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
41.0k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
41.0k
      break;
713
0
    }
714
70.2k
    case Type::DCT16X32: {
715
70.2k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
70.2k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
70.2k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
70.2k
      break;
719
0
    }
720
38.9k
    case Type::DCT32X32: {
721
38.9k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
38.9k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
38.9k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
38.9k
      break;
725
0
    }
726
4.76k
    case Type::DCT64X32: {
727
4.76k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
4.76k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
4.76k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
4.76k
      break;
731
0
    }
732
7.35k
    case Type::DCT32X64: {
733
7.35k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
7.35k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
7.35k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
7.35k
      break;
737
0
    }
738
19.5k
    case Type::DCT64X64: {
739
19.5k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
19.5k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
19.5k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
19.5k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
9.98M
    case Type::DCT:
787
10.8M
    case Type::DCT2X2:
788
10.8M
    case Type::DCT4X4:
789
10.9M
    case Type::DCT4X8:
790
11.1M
    case Type::DCT8X4:
791
11.3M
    case Type::AFV0:
792
11.4M
    case Type::AFV1:
793
11.6M
    case Type::AFV2:
794
11.8M
    case Type::AFV3:
795
12.6M
    case Type::IDENTITY:
796
12.6M
      dc[0] = block[0];
797
12.6M
      break;
798
13.4M
  }
799
13.4M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
800
801
}  // namespace
802
// NOLINTNEXTLINE(google-readability-namespace-comments)
803
}  // namespace HWY_NAMESPACE
804
}  // namespace jxl
805
HWY_AFTER_NAMESPACE();
806
807
#endif  // LIB_JXL_ENC_TRANSFORMS_INL_H_