Coverage Report

Created: 2025-11-16 07:22

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/enc_transforms-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/base/compiler_specific.h"
7
#include "lib/jxl/frame_dimensions.h"
8
9
#if defined(LIB_JXL_ENC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
10
#ifdef LIB_JXL_ENC_TRANSFORMS_INL_H_
11
#undef LIB_JXL_ENC_TRANSFORMS_INL_H_
12
#else
13
#define LIB_JXL_ENC_TRANSFORMS_INL_H_
14
#endif
15
16
#include <cstddef>
17
#include <cstdint>
18
#include <hwy/highway.h>
19
20
#include "lib/jxl/ac_strategy.h"
21
#include "lib/jxl/dct-inl.h"
22
#include "lib/jxl/dct_scales.h"
23
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
27
enum class AcStrategyType : uint32_t;
28
29
namespace HWY_NAMESPACE {
30
namespace {
31
32
constexpr size_t kMaxBlocks = 32;
33
34
// Inverse of ReinterpretingDCT.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
HWY_INLINE void ReinterpretingIDCT(const float* input,
38
                                   const size_t input_stride, float* output,
39
2.48M
                                   const size_t output_stride, float* scratch) {
40
2.48M
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
2.48M
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
2.48M
  float* block = scratch;
43
2.48M
  if (ROWS < COLS) {
44
2.36M
    for (size_t y = 0; y < LF_ROWS; y++) {
45
5.51M
      for (size_t x = 0; x < LF_COLS; x++) {
46
4.18M
        block[y * COLS + x] = input[y * input_stride + x] *
47
4.18M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
4.18M
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
4.18M
      }
50
1.33M
    }
51
1.45M
  } else {
52
4.74M
    for (size_t y = 0; y < LF_COLS; y++) {
53
16.2M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
12.9M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
12.9M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
12.9M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
12.9M
      }
58
3.28M
    }
59
1.45M
  }
60
61
2.48M
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
2.48M
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
2.48M
                                  scratch_space);
64
2.48M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
297k
                                   const size_t output_stride, float* scratch) {
40
297k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
297k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
297k
  float* block = scratch;
43
297k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
297k
  } else {
52
595k
    for (size_t y = 0; y < LF_COLS; y++) {
53
892k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
595k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
595k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
595k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
595k
      }
58
297k
    }
59
297k
  }
60
61
297k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
297k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
297k
                                  scratch_space);
64
297k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
378k
                                   const size_t output_stride, float* scratch) {
40
378k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
378k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
378k
  float* block = scratch;
43
378k
  if (ROWS < COLS) {
44
757k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.13M
      for (size_t x = 0; x < LF_COLS; x++) {
46
757k
        block[y * COLS + x] = input[y * input_stride + x] *
47
757k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
757k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
757k
      }
50
378k
    }
51
378k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
378k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
378k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
378k
                                  scratch_space);
64
378k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
178k
                                   const size_t output_stride, float* scratch) {
40
178k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
178k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
178k
  float* block = scratch;
43
178k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
178k
  } else {
52
535k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.07M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
714k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
714k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
714k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
714k
      }
58
357k
    }
59
178k
  }
60
61
178k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
178k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
178k
                                  scratch_space);
64
178k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
80.1k
                                   const size_t output_stride, float* scratch) {
40
80.1k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
80.1k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
80.1k
  float* block = scratch;
43
80.1k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
80.1k
  } else {
52
240k
    for (size_t y = 0; y < LF_COLS; y++) {
53
801k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
641k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
641k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
641k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
641k
      }
58
160k
    }
59
80.1k
  }
60
61
80.1k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
80.1k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
80.1k
                                  scratch_space);
64
80.1k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
123k
                                   const size_t output_stride, float* scratch) {
40
123k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
123k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
123k
  float* block = scratch;
43
123k
  if (ROWS < COLS) {
44
369k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.23M
      for (size_t x = 0; x < LF_COLS; x++) {
46
984k
        block[y * COLS + x] = input[y * input_stride + x] *
47
984k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
984k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
984k
      }
50
246k
    }
51
123k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
123k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
123k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
123k
                                  scratch_space);
64
123k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
130k
                                   const size_t output_stride, float* scratch) {
40
130k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
130k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
130k
  float* block = scratch;
43
130k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
130k
  } else {
52
654k
    for (size_t y = 0; y < LF_COLS; y++) {
53
2.61M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
2.09M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
2.09M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
2.09M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
2.09M
      }
58
523k
    }
59
130k
  }
60
61
130k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
130k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
130k
                                  scratch_space);
64
130k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
6.05k
                                   const size_t output_stride, float* scratch) {
40
6.05k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
6.05k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
6.05k
  float* block = scratch;
43
6.05k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
6.05k
  } else {
52
30.2k
    for (size_t y = 0; y < LF_COLS; y++) {
53
217k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
193k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
193k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
193k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
193k
      }
58
24.2k
    }
59
6.05k
  }
60
61
6.05k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
6.05k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
6.05k
                                  scratch_space);
64
6.05k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
10.9k
                                   const size_t output_stride, float* scratch) {
40
10.9k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
10.9k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
10.9k
  float* block = scratch;
43
10.9k
  if (ROWS < COLS) {
44
54.5k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
392k
      for (size_t x = 0; x < LF_COLS; x++) {
46
349k
        block[y * COLS + x] = input[y * input_stride + x] *
47
349k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
349k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
349k
      }
50
43.6k
    }
51
10.9k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
10.9k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
10.9k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
10.9k
                                  scratch_space);
64
10.9k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
34.9k
                                   const size_t output_stride, float* scratch) {
40
34.9k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
34.9k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
34.9k
  float* block = scratch;
43
34.9k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
34.9k
  } else {
52
314k
    for (size_t y = 0; y < LF_COLS; y++) {
53
2.51M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
2.23M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
2.23M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
2.23M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
2.23M
      }
58
279k
    }
59
34.9k
  }
60
61
34.9k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
34.9k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
34.9k
                                  scratch_space);
64
34.9k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
297k
                                   const size_t output_stride, float* scratch) {
40
297k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
297k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
297k
  float* block = scratch;
43
297k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
297k
  } else {
52
595k
    for (size_t y = 0; y < LF_COLS; y++) {
53
892k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
595k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
595k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
595k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
595k
      }
58
297k
    }
59
297k
  }
60
61
297k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
297k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
297k
                                  scratch_space);
64
297k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
378k
                                   const size_t output_stride, float* scratch) {
40
378k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
378k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
378k
  float* block = scratch;
43
378k
  if (ROWS < COLS) {
44
757k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.13M
      for (size_t x = 0; x < LF_COLS; x++) {
46
757k
        block[y * COLS + x] = input[y * input_stride + x] *
47
757k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
757k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
757k
      }
50
378k
    }
51
378k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
378k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
378k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
378k
                                  scratch_space);
64
378k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
178k
                                   const size_t output_stride, float* scratch) {
40
178k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
178k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
178k
  float* block = scratch;
43
178k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
178k
  } else {
52
535k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.07M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
714k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
714k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
714k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
714k
      }
58
357k
    }
59
178k
  }
60
61
178k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
178k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
178k
                                  scratch_space);
64
178k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
80.1k
                                   const size_t output_stride, float* scratch) {
40
80.1k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
80.1k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
80.1k
  float* block = scratch;
43
80.1k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
80.1k
  } else {
52
240k
    for (size_t y = 0; y < LF_COLS; y++) {
53
801k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
641k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
641k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
641k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
641k
      }
58
160k
    }
59
80.1k
  }
60
61
80.1k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
80.1k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
80.1k
                                  scratch_space);
64
80.1k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
123k
                                   const size_t output_stride, float* scratch) {
40
123k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
123k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
123k
  float* block = scratch;
43
123k
  if (ROWS < COLS) {
44
369k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.23M
      for (size_t x = 0; x < LF_COLS; x++) {
46
984k
        block[y * COLS + x] = input[y * input_stride + x] *
47
984k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
984k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
984k
      }
50
246k
    }
51
123k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
123k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
123k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
123k
                                  scratch_space);
64
123k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
130k
                                   const size_t output_stride, float* scratch) {
40
130k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
130k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
130k
  float* block = scratch;
43
130k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
130k
  } else {
52
654k
    for (size_t y = 0; y < LF_COLS; y++) {
53
2.61M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
2.09M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
2.09M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
2.09M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
2.09M
      }
58
523k
    }
59
130k
  }
60
61
130k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
130k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
130k
                                  scratch_space);
64
130k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
6.05k
                                   const size_t output_stride, float* scratch) {
40
6.05k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
6.05k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
6.05k
  float* block = scratch;
43
6.05k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
6.05k
  } else {
52
30.2k
    for (size_t y = 0; y < LF_COLS; y++) {
53
217k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
193k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
193k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
193k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
193k
      }
58
24.2k
    }
59
6.05k
  }
60
61
6.05k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
6.05k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
6.05k
                                  scratch_space);
64
6.05k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
10.9k
                                   const size_t output_stride, float* scratch) {
40
10.9k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
10.9k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
10.9k
  float* block = scratch;
43
10.9k
  if (ROWS < COLS) {
44
54.5k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
392k
      for (size_t x = 0; x < LF_COLS; x++) {
46
349k
        block[y * COLS + x] = input[y * input_stride + x] *
47
349k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
349k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
349k
      }
50
43.6k
    }
51
10.9k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
10.9k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
10.9k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
10.9k
                                  scratch_space);
64
10.9k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
34.9k
                                   const size_t output_stride, float* scratch) {
40
34.9k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
34.9k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
34.9k
  float* block = scratch;
43
34.9k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
34.9k
  } else {
52
314k
    for (size_t y = 0; y < LF_COLS; y++) {
53
2.51M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
2.23M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
2.23M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
2.23M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
2.23M
      }
58
279k
    }
59
34.9k
  }
60
61
34.9k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
34.9k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
34.9k
                                  scratch_space);
64
34.9k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
65
66
template <size_t S>
67
50.4M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
50.4M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
50.4M
  static_assert(S % 2 == 0, "S should be even");
70
50.4M
  float temp[kDCTBlockSize];
71
50.4M
  constexpr size_t num_2x2 = S / 2;
72
168M
  for (size_t y = 0; y < num_2x2; y++) {
73
471M
    for (size_t x = 0; x < num_2x2; x++) {
74
353M
      float c00 = block[y * 2 * stride + x * 2];
75
353M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
353M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
353M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
353M
      float r00 = c00 + c01 + c10 + c11;
79
353M
      float r01 = c00 + c01 - c10 - c11;
80
353M
      float r10 = c00 - c01 + c10 - c11;
81
353M
      float r11 = c00 - c01 - c10 + c11;
82
353M
      r00 *= 0.25f;
83
353M
      r01 *= 0.25f;
84
353M
      r10 *= 0.25f;
85
353M
      r11 *= 0.25f;
86
353M
      temp[y * kBlockDim + x] = r00;
87
353M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
353M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
353M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
353M
    }
91
117M
  }
92
286M
  for (size_t y = 0; y < S; y++) {
93
1.64G
    for (size_t x = 0; x < S; x++) {
94
1.41G
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
1.41G
    }
96
235M
  }
97
50.4M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.00M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.00M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.00M
  static_assert(S % 2 == 0, "S should be even");
70
1.00M
  float temp[kDCTBlockSize];
71
1.00M
  constexpr size_t num_2x2 = S / 2;
72
5.00M
  for (size_t y = 0; y < num_2x2; y++) {
73
20.0M
    for (size_t x = 0; x < num_2x2; x++) {
74
16.0M
      float c00 = block[y * 2 * stride + x * 2];
75
16.0M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
16.0M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
16.0M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
16.0M
      float r00 = c00 + c01 + c10 + c11;
79
16.0M
      float r01 = c00 + c01 - c10 - c11;
80
16.0M
      float r10 = c00 - c01 + c10 - c11;
81
16.0M
      float r11 = c00 - c01 - c10 + c11;
82
16.0M
      r00 *= 0.25f;
83
16.0M
      r01 *= 0.25f;
84
16.0M
      r10 *= 0.25f;
85
16.0M
      r11 *= 0.25f;
86
16.0M
      temp[y * kBlockDim + x] = r00;
87
16.0M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
16.0M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
16.0M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
16.0M
    }
91
4.00M
  }
92
9.00M
  for (size_t y = 0; y < S; y++) {
93
72.0M
    for (size_t x = 0; x < S; x++) {
94
64.0M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
64.0M
    }
96
8.00M
  }
97
1.00M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.00M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.00M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.00M
  static_assert(S % 2 == 0, "S should be even");
70
1.00M
  float temp[kDCTBlockSize];
71
1.00M
  constexpr size_t num_2x2 = S / 2;
72
3.00M
  for (size_t y = 0; y < num_2x2; y++) {
73
6.00M
    for (size_t x = 0; x < num_2x2; x++) {
74
4.00M
      float c00 = block[y * 2 * stride + x * 2];
75
4.00M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
4.00M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
4.00M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
4.00M
      float r00 = c00 + c01 + c10 + c11;
79
4.00M
      float r01 = c00 + c01 - c10 - c11;
80
4.00M
      float r10 = c00 - c01 + c10 - c11;
81
4.00M
      float r11 = c00 - c01 - c10 + c11;
82
4.00M
      r00 *= 0.25f;
83
4.00M
      r01 *= 0.25f;
84
4.00M
      r10 *= 0.25f;
85
4.00M
      r11 *= 0.25f;
86
4.00M
      temp[y * kBlockDim + x] = r00;
87
4.00M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
4.00M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
4.00M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
4.00M
    }
91
2.00M
  }
92
5.00M
  for (size_t y = 0; y < S; y++) {
93
20.0M
    for (size_t x = 0; x < S; x++) {
94
16.0M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
16.0M
    }
96
4.00M
  }
97
1.00M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.00M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.00M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.00M
  static_assert(S % 2 == 0, "S should be even");
70
1.00M
  float temp[kDCTBlockSize];
71
1.00M
  constexpr size_t num_2x2 = S / 2;
72
2.00M
  for (size_t y = 0; y < num_2x2; y++) {
73
2.00M
    for (size_t x = 0; x < num_2x2; x++) {
74
1.00M
      float c00 = block[y * 2 * stride + x * 2];
75
1.00M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
1.00M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
1.00M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
1.00M
      float r00 = c00 + c01 + c10 + c11;
79
1.00M
      float r01 = c00 + c01 - c10 - c11;
80
1.00M
      float r10 = c00 - c01 + c10 - c11;
81
1.00M
      float r11 = c00 - c01 - c10 + c11;
82
1.00M
      r00 *= 0.25f;
83
1.00M
      r01 *= 0.25f;
84
1.00M
      r10 *= 0.25f;
85
1.00M
      r11 *= 0.25f;
86
1.00M
      temp[y * kBlockDim + x] = r00;
87
1.00M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
1.00M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
1.00M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
1.00M
    }
91
1.00M
  }
92
3.00M
  for (size_t y = 0; y < S; y++) {
93
6.00M
    for (size_t x = 0; x < S; x++) {
94
4.00M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
4.00M
    }
96
2.00M
  }
97
1.00M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
14.8M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
14.8M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
14.8M
  static_assert(S % 2 == 0, "S should be even");
70
14.8M
  float temp[kDCTBlockSize];
71
14.8M
  constexpr size_t num_2x2 = S / 2;
72
74.1M
  for (size_t y = 0; y < num_2x2; y++) {
73
296M
    for (size_t x = 0; x < num_2x2; x++) {
74
237M
      float c00 = block[y * 2 * stride + x * 2];
75
237M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
237M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
237M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
237M
      float r00 = c00 + c01 + c10 + c11;
79
237M
      float r01 = c00 + c01 - c10 - c11;
80
237M
      float r10 = c00 - c01 + c10 - c11;
81
237M
      float r11 = c00 - c01 - c10 + c11;
82
237M
      r00 *= 0.25f;
83
237M
      r01 *= 0.25f;
84
237M
      r10 *= 0.25f;
85
237M
      r11 *= 0.25f;
86
237M
      temp[y * kBlockDim + x] = r00;
87
237M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
237M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
237M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
237M
    }
91
59.3M
  }
92
133M
  for (size_t y = 0; y < S; y++) {
93
1.06G
    for (size_t x = 0; x < S; x++) {
94
949M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
949M
    }
96
118M
  }
97
14.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
14.8M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
14.8M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
14.8M
  static_assert(S % 2 == 0, "S should be even");
70
14.8M
  float temp[kDCTBlockSize];
71
14.8M
  constexpr size_t num_2x2 = S / 2;
72
44.4M
  for (size_t y = 0; y < num_2x2; y++) {
73
88.9M
    for (size_t x = 0; x < num_2x2; x++) {
74
59.3M
      float c00 = block[y * 2 * stride + x * 2];
75
59.3M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
59.3M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
59.3M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
59.3M
      float r00 = c00 + c01 + c10 + c11;
79
59.3M
      float r01 = c00 + c01 - c10 - c11;
80
59.3M
      float r10 = c00 - c01 + c10 - c11;
81
59.3M
      float r11 = c00 - c01 - c10 + c11;
82
59.3M
      r00 *= 0.25f;
83
59.3M
      r01 *= 0.25f;
84
59.3M
      r10 *= 0.25f;
85
59.3M
      r11 *= 0.25f;
86
59.3M
      temp[y * kBlockDim + x] = r00;
87
59.3M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
59.3M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
59.3M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
59.3M
    }
91
29.6M
  }
92
74.1M
  for (size_t y = 0; y < S; y++) {
93
296M
    for (size_t x = 0; x < S; x++) {
94
237M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
237M
    }
96
59.3M
  }
97
14.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
14.8M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
14.8M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
14.8M
  static_assert(S % 2 == 0, "S should be even");
70
14.8M
  float temp[kDCTBlockSize];
71
14.8M
  constexpr size_t num_2x2 = S / 2;
72
29.6M
  for (size_t y = 0; y < num_2x2; y++) {
73
29.6M
    for (size_t x = 0; x < num_2x2; x++) {
74
14.8M
      float c00 = block[y * 2 * stride + x * 2];
75
14.8M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
14.8M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
14.8M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
14.8M
      float r00 = c00 + c01 + c10 + c11;
79
14.8M
      float r01 = c00 + c01 - c10 - c11;
80
14.8M
      float r10 = c00 - c01 + c10 - c11;
81
14.8M
      float r11 = c00 - c01 - c10 + c11;
82
14.8M
      r00 *= 0.25f;
83
14.8M
      r01 *= 0.25f;
84
14.8M
      r10 *= 0.25f;
85
14.8M
      r11 *= 0.25f;
86
14.8M
      temp[y * kBlockDim + x] = r00;
87
14.8M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
14.8M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
14.8M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
14.8M
    }
91
14.8M
  }
92
44.4M
  for (size_t y = 0; y < S; y++) {
93
88.9M
    for (size_t x = 0; x < S; x++) {
94
59.3M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
59.3M
    }
96
29.6M
  }
97
14.8M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.00M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.00M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.00M
  static_assert(S % 2 == 0, "S should be even");
70
1.00M
  float temp[kDCTBlockSize];
71
1.00M
  constexpr size_t num_2x2 = S / 2;
72
5.00M
  for (size_t y = 0; y < num_2x2; y++) {
73
20.0M
    for (size_t x = 0; x < num_2x2; x++) {
74
16.0M
      float c00 = block[y * 2 * stride + x * 2];
75
16.0M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
16.0M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
16.0M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
16.0M
      float r00 = c00 + c01 + c10 + c11;
79
16.0M
      float r01 = c00 + c01 - c10 - c11;
80
16.0M
      float r10 = c00 - c01 + c10 - c11;
81
16.0M
      float r11 = c00 - c01 - c10 + c11;
82
16.0M
      r00 *= 0.25f;
83
16.0M
      r01 *= 0.25f;
84
16.0M
      r10 *= 0.25f;
85
16.0M
      r11 *= 0.25f;
86
16.0M
      temp[y * kBlockDim + x] = r00;
87
16.0M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
16.0M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
16.0M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
16.0M
    }
91
4.00M
  }
92
9.00M
  for (size_t y = 0; y < S; y++) {
93
72.0M
    for (size_t x = 0; x < S; x++) {
94
64.0M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
64.0M
    }
96
8.00M
  }
97
1.00M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.00M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.00M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.00M
  static_assert(S % 2 == 0, "S should be even");
70
1.00M
  float temp[kDCTBlockSize];
71
1.00M
  constexpr size_t num_2x2 = S / 2;
72
3.00M
  for (size_t y = 0; y < num_2x2; y++) {
73
6.00M
    for (size_t x = 0; x < num_2x2; x++) {
74
4.00M
      float c00 = block[y * 2 * stride + x * 2];
75
4.00M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
4.00M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
4.00M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
4.00M
      float r00 = c00 + c01 + c10 + c11;
79
4.00M
      float r01 = c00 + c01 - c10 - c11;
80
4.00M
      float r10 = c00 - c01 + c10 - c11;
81
4.00M
      float r11 = c00 - c01 - c10 + c11;
82
4.00M
      r00 *= 0.25f;
83
4.00M
      r01 *= 0.25f;
84
4.00M
      r10 *= 0.25f;
85
4.00M
      r11 *= 0.25f;
86
4.00M
      temp[y * kBlockDim + x] = r00;
87
4.00M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
4.00M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
4.00M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
4.00M
    }
91
2.00M
  }
92
5.00M
  for (size_t y = 0; y < S; y++) {
93
20.0M
    for (size_t x = 0; x < S; x++) {
94
16.0M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
16.0M
    }
96
4.00M
  }
97
1.00M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.00M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.00M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.00M
  static_assert(S % 2 == 0, "S should be even");
70
1.00M
  float temp[kDCTBlockSize];
71
1.00M
  constexpr size_t num_2x2 = S / 2;
72
2.00M
  for (size_t y = 0; y < num_2x2; y++) {
73
2.00M
    for (size_t x = 0; x < num_2x2; x++) {
74
1.00M
      float c00 = block[y * 2 * stride + x * 2];
75
1.00M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
1.00M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
1.00M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
1.00M
      float r00 = c00 + c01 + c10 + c11;
79
1.00M
      float r01 = c00 + c01 - c10 - c11;
80
1.00M
      float r10 = c00 - c01 + c10 - c11;
81
1.00M
      float r11 = c00 - c01 - c10 + c11;
82
1.00M
      r00 *= 0.25f;
83
1.00M
      r01 *= 0.25f;
84
1.00M
      r10 *= 0.25f;
85
1.00M
      r11 *= 0.25f;
86
1.00M
      temp[y * kBlockDim + x] = r00;
87
1.00M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
1.00M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
1.00M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
1.00M
    }
91
1.00M
  }
92
3.00M
  for (size_t y = 0; y < S; y++) {
93
6.00M
    for (size_t x = 0; x < S; x++) {
94
4.00M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
4.00M
    }
96
2.00M
  }
97
1.00M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
98
99
61.2M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
61.2M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
61.2M
      {
102
61.2M
          0.2500000000000000,
103
61.2M
          0.8769029297991420f,
104
61.2M
          0.0000000000000000,
105
61.2M
          0.0000000000000000,
106
61.2M
          0.0000000000000000,
107
61.2M
          -0.4105377591765233f,
108
61.2M
          0.0000000000000000,
109
61.2M
          0.0000000000000000,
110
61.2M
          0.0000000000000000,
111
61.2M
          0.0000000000000000,
112
61.2M
          0.0000000000000000,
113
61.2M
          0.0000000000000000,
114
61.2M
          0.0000000000000000,
115
61.2M
          0.0000000000000000,
116
61.2M
          0.0000000000000000,
117
61.2M
          0.0000000000000000,
118
61.2M
      },
119
61.2M
      {
120
61.2M
          0.2500000000000000,
121
61.2M
          0.2206518106944235f,
122
61.2M
          0.0000000000000000,
123
61.2M
          0.0000000000000000,
124
61.2M
          -0.7071067811865474f,
125
61.2M
          0.6235485373547691f,
126
61.2M
          0.0000000000000000,
127
61.2M
          0.0000000000000000,
128
61.2M
          0.0000000000000000,
129
61.2M
          0.0000000000000000,
130
61.2M
          0.0000000000000000,
131
61.2M
          0.0000000000000000,
132
61.2M
          0.0000000000000000,
133
61.2M
          0.0000000000000000,
134
61.2M
          0.0000000000000000,
135
61.2M
          0.0000000000000000,
136
61.2M
      },
137
61.2M
      {
138
61.2M
          0.2500000000000000,
139
61.2M
          -0.1014005039375376f,
140
61.2M
          0.4067007583026075f,
141
61.2M
          -0.2125574805828875f,
142
61.2M
          0.0000000000000000,
143
61.2M
          -0.0643507165794627f,
144
61.2M
          -0.4517556589999482f,
145
61.2M
          -0.3046847507248690f,
146
61.2M
          0.3017929516615495f,
147
61.2M
          0.4082482904638627f,
148
61.2M
          0.1747866975480809f,
149
61.2M
          -0.2110560104933578f,
150
61.2M
          -0.1426608480880726f,
151
61.2M
          -0.1381354035075859f,
152
61.2M
          -0.1743760259965107f,
153
61.2M
          0.1135498731499434f,
154
61.2M
      },
155
61.2M
      {
156
61.2M
          0.2500000000000000,
157
61.2M
          -0.1014005039375375f,
158
61.2M
          0.4444481661973445f,
159
61.2M
          0.3085497062849767f,
160
61.2M
          0.0000000000000000f,
161
61.2M
          -0.0643507165794627f,
162
61.2M
          0.1585450355184006f,
163
61.2M
          0.5112616136591823f,
164
61.2M
          0.2579236279634118f,
165
61.2M
          0.0000000000000000,
166
61.2M
          0.0812611176717539f,
167
61.2M
          0.1856718091610980f,
168
61.2M
          -0.3416446842253372f,
169
61.2M
          0.3302282550303788f,
170
61.2M
          0.0702790691196284f,
171
61.2M
          -0.0741750459581035f,
172
61.2M
      },
173
61.2M
      {
174
61.2M
          0.2500000000000000,
175
61.2M
          0.2206518106944236f,
176
61.2M
          0.0000000000000000,
177
61.2M
          0.0000000000000000,
178
61.2M
          0.7071067811865476f,
179
61.2M
          0.6235485373547694f,
180
61.2M
          0.0000000000000000,
181
61.2M
          0.0000000000000000,
182
61.2M
          0.0000000000000000,
183
61.2M
          0.0000000000000000,
184
61.2M
          0.0000000000000000,
185
61.2M
          0.0000000000000000,
186
61.2M
          0.0000000000000000,
187
61.2M
          0.0000000000000000,
188
61.2M
          0.0000000000000000,
189
61.2M
          0.0000000000000000,
190
61.2M
      },
191
61.2M
      {
192
61.2M
          0.2500000000000000,
193
61.2M
          -0.1014005039375378f,
194
61.2M
          0.0000000000000000,
195
61.2M
          0.4706702258572536f,
196
61.2M
          0.0000000000000000,
197
61.2M
          -0.0643507165794628f,
198
61.2M
          -0.0403851516082220f,
199
61.2M
          0.0000000000000000,
200
61.2M
          0.1627234014286620f,
201
61.2M
          0.0000000000000000,
202
61.2M
          0.0000000000000000,
203
61.2M
          0.0000000000000000,
204
61.2M
          0.7367497537172237f,
205
61.2M
          0.0875511500058708f,
206
61.2M
          -0.2921026642334881f,
207
61.2M
          0.1940289303259434f,
208
61.2M
      },
209
61.2M
      {
210
61.2M
          0.2500000000000000,
211
61.2M
          -0.1014005039375377f,
212
61.2M
          0.1957439937204294f,
213
61.2M
          -0.1621205195722993f,
214
61.2M
          0.0000000000000000,
215
61.2M
          -0.0643507165794628f,
216
61.2M
          0.0074182263792424f,
217
61.2M
          -0.2904801297289980f,
218
61.2M
          0.0952002265347504f,
219
61.2M
          0.0000000000000000,
220
61.2M
          -0.3675398009862027f,
221
61.2M
          0.4921585901373873f,
222
61.2M
          0.2462710772207515f,
223
61.2M
          -0.0794670660590957f,
224
61.2M
          0.3623817333531167f,
225
61.2M
          -0.4351904965232280f,
226
61.2M
      },
227
61.2M
      {
228
61.2M
          0.2500000000000000,
229
61.2M
          -0.1014005039375376f,
230
61.2M
          0.2929100136981264f,
231
61.2M
          0.0000000000000000,
232
61.2M
          0.0000000000000000,
233
61.2M
          -0.0643507165794627f,
234
61.2M
          0.3935103426921017f,
235
61.2M
          -0.0657870154914280f,
236
61.2M
          0.0000000000000000,
237
61.2M
          -0.4082482904638628f,
238
61.2M
          -0.3078822139579090f,
239
61.2M
          -0.3852501370925192f,
240
61.2M
          -0.0857401903551931f,
241
61.2M
          -0.4613374887461511f,
242
61.2M
          0.0000000000000000,
243
61.2M
          0.2191868483885747f,
244
61.2M
      },
245
61.2M
      {
246
61.2M
          0.2500000000000000,
247
61.2M
          -0.1014005039375376f,
248
61.2M
          -0.4067007583026072f,
249
61.2M
          -0.2125574805828705f,
250
61.2M
          0.0000000000000000,
251
61.2M
          -0.0643507165794627f,
252
61.2M
          -0.4517556589999464f,
253
61.2M
          0.3046847507248840f,
254
61.2M
          0.3017929516615503f,
255
61.2M
          -0.4082482904638635f,
256
61.2M
          -0.1747866975480813f,
257
61.2M
          0.2110560104933581f,
258
61.2M
          -0.1426608480880734f,
259
61.2M
          -0.1381354035075829f,
260
61.2M
          -0.1743760259965108f,
261
61.2M
          0.1135498731499426f,
262
61.2M
      },
263
61.2M
      {
264
61.2M
          0.2500000000000000,
265
61.2M
          -0.1014005039375377f,
266
61.2M
          -0.1957439937204287f,
267
61.2M
          -0.1621205195722833f,
268
61.2M
          0.0000000000000000,
269
61.2M
          -0.0643507165794628f,
270
61.2M
          0.0074182263792444f,
271
61.2M
          0.2904801297290076f,
272
61.2M
          0.0952002265347505f,
273
61.2M
          0.0000000000000000,
274
61.2M
          0.3675398009862011f,
275
61.2M
          -0.4921585901373891f,
276
61.2M
          0.2462710772207514f,
277
61.2M
          -0.0794670660591026f,
278
61.2M
          0.3623817333531165f,
279
61.2M
          -0.4351904965232251f,
280
61.2M
      },
281
61.2M
      {
282
61.2M
          0.2500000000000000,
283
61.2M
          -0.1014005039375375f,
284
61.2M
          0.0000000000000000,
285
61.2M
          -0.4706702258572528f,
286
61.2M
          0.0000000000000000,
287
61.2M
          -0.0643507165794627f,
288
61.2M
          0.1107416575309343f,
289
61.2M
          0.0000000000000000,
290
61.2M
          -0.1627234014286617f,
291
61.2M
          0.0000000000000000,
292
61.2M
          0.0000000000000000,
293
61.2M
          0.0000000000000000,
294
61.2M
          0.1488339922711357f,
295
61.2M
          0.4972464710953509f,
296
61.2M
          0.2921026642334879f,
297
61.2M
          0.5550443808910661f,
298
61.2M
      },
299
61.2M
      {
300
61.2M
          0.2500000000000000,
301
61.2M
          -0.1014005039375377f,
302
61.2M
          0.1137907446044809f,
303
61.2M
          -0.1464291867126764f,
304
61.2M
          0.0000000000000000,
305
61.2M
          -0.0643507165794628f,
306
61.2M
          0.0829816309488205f,
307
61.2M
          -0.2388977352334460f,
308
61.2M
          -0.3531238544981630f,
309
61.2M
          -0.4082482904638630f,
310
61.2M
          0.4826689115059883f,
311
61.2M
          0.1741941265991622f,
312
61.2M
          -0.0476868035022925f,
313
61.2M
          0.1253805944856366f,
314
61.2M
          -0.4326608024727445f,
315
61.2M
          -0.2546827712406646f,
316
61.2M
      },
317
61.2M
      {
318
61.2M
          0.2500000000000000,
319
61.2M
          -0.1014005039375377f,
320
61.2M
          -0.4444481661973438f,
321
61.2M
          0.3085497062849487f,
322
61.2M
          0.0000000000000000,
323
61.2M
          -0.0643507165794628f,
324
61.2M
          0.1585450355183970f,
325
61.2M
          -0.5112616136592012f,
326
61.2M
          0.2579236279634129f,
327
61.2M
          0.0000000000000000,
328
61.2M
          -0.0812611176717504f,
329
61.2M
          -0.1856718091610990f,
330
61.2M
          -0.3416446842253373f,
331
61.2M
          0.3302282550303805f,
332
61.2M
          0.0702790691196282f,
333
61.2M
          -0.0741750459581023f,
334
61.2M
      },
335
61.2M
      {
336
61.2M
          0.2500000000000000,
337
61.2M
          -0.1014005039375376f,
338
61.2M
          -0.2929100136981264f,
339
61.2M
          0.0000000000000000,
340
61.2M
          0.0000000000000000,
341
61.2M
          -0.0643507165794627f,
342
61.2M
          0.3935103426921022f,
343
61.2M
          0.0657870154914254f,
344
61.2M
          0.0000000000000000,
345
61.2M
          0.4082482904638634f,
346
61.2M
          0.3078822139579031f,
347
61.2M
          0.3852501370925211f,
348
61.2M
          -0.0857401903551927f,
349
61.2M
          -0.4613374887461554f,
350
61.2M
          0.0000000000000000,
351
61.2M
          0.2191868483885728f,
352
61.2M
      },
353
61.2M
      {
354
61.2M
          0.2500000000000000,
355
61.2M
          -0.1014005039375376f,
356
61.2M
          -0.1137907446044814f,
357
61.2M
          -0.1464291867126654f,
358
61.2M
          0.0000000000000000,
359
61.2M
          -0.0643507165794627f,
360
61.2M
          0.0829816309488214f,
361
61.2M
          0.2388977352334547f,
362
61.2M
          -0.3531238544981624f,
363
61.2M
          0.4082482904638630f,
364
61.2M
          -0.4826689115059858f,
365
61.2M
          -0.1741941265991621f,
366
61.2M
          -0.0476868035022928f,
367
61.2M
          0.1253805944856431f,
368
61.2M
          -0.4326608024727457f,
369
61.2M
          -0.2546827712406641f,
370
61.2M
      },
371
61.2M
      {
372
61.2M
          0.2500000000000000,
373
61.2M
          -0.1014005039375374f,
374
61.2M
          0.0000000000000000,
375
61.2M
          0.4251149611657548f,
376
61.2M
          0.0000000000000000,
377
61.2M
          -0.0643507165794626f,
378
61.2M
          -0.4517556589999480f,
379
61.2M
          0.0000000000000000,
380
61.2M
          -0.6035859033230976f,
381
61.2M
          0.0000000000000000,
382
61.2M
          0.0000000000000000,
383
61.2M
          0.0000000000000000,
384
61.2M
          -0.1426608480880724f,
385
61.2M
          -0.1381354035075845f,
386
61.2M
          0.3487520519930227f,
387
61.2M
          0.1135498731499429f,
388
61.2M
      },
389
61.2M
  };
390
391
61.2M
  const HWY_CAPPED(float, 16) d;
392
183M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
122M
    auto scalar = Zero(d);
394
2.08G
    for (size_t j = 0; j < 16; j++) {
395
1.95G
      auto px = Set(d, pixels[j]);
396
1.95G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
1.95G
      scalar = MulAdd(px, basis, scalar);
398
1.95G
    }
399
122M
    Store(scalar, d, coeffs + i);
400
122M
  }
401
61.2M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
942k
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
942k
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
942k
      {
102
942k
          0.2500000000000000,
103
942k
          0.8769029297991420f,
104
942k
          0.0000000000000000,
105
942k
          0.0000000000000000,
106
942k
          0.0000000000000000,
107
942k
          -0.4105377591765233f,
108
942k
          0.0000000000000000,
109
942k
          0.0000000000000000,
110
942k
          0.0000000000000000,
111
942k
          0.0000000000000000,
112
942k
          0.0000000000000000,
113
942k
          0.0000000000000000,
114
942k
          0.0000000000000000,
115
942k
          0.0000000000000000,
116
942k
          0.0000000000000000,
117
942k
          0.0000000000000000,
118
942k
      },
119
942k
      {
120
942k
          0.2500000000000000,
121
942k
          0.2206518106944235f,
122
942k
          0.0000000000000000,
123
942k
          0.0000000000000000,
124
942k
          -0.7071067811865474f,
125
942k
          0.6235485373547691f,
126
942k
          0.0000000000000000,
127
942k
          0.0000000000000000,
128
942k
          0.0000000000000000,
129
942k
          0.0000000000000000,
130
942k
          0.0000000000000000,
131
942k
          0.0000000000000000,
132
942k
          0.0000000000000000,
133
942k
          0.0000000000000000,
134
942k
          0.0000000000000000,
135
942k
          0.0000000000000000,
136
942k
      },
137
942k
      {
138
942k
          0.2500000000000000,
139
942k
          -0.1014005039375376f,
140
942k
          0.4067007583026075f,
141
942k
          -0.2125574805828875f,
142
942k
          0.0000000000000000,
143
942k
          -0.0643507165794627f,
144
942k
          -0.4517556589999482f,
145
942k
          -0.3046847507248690f,
146
942k
          0.3017929516615495f,
147
942k
          0.4082482904638627f,
148
942k
          0.1747866975480809f,
149
942k
          -0.2110560104933578f,
150
942k
          -0.1426608480880726f,
151
942k
          -0.1381354035075859f,
152
942k
          -0.1743760259965107f,
153
942k
          0.1135498731499434f,
154
942k
      },
155
942k
      {
156
942k
          0.2500000000000000,
157
942k
          -0.1014005039375375f,
158
942k
          0.4444481661973445f,
159
942k
          0.3085497062849767f,
160
942k
          0.0000000000000000f,
161
942k
          -0.0643507165794627f,
162
942k
          0.1585450355184006f,
163
942k
          0.5112616136591823f,
164
942k
          0.2579236279634118f,
165
942k
          0.0000000000000000,
166
942k
          0.0812611176717539f,
167
942k
          0.1856718091610980f,
168
942k
          -0.3416446842253372f,
169
942k
          0.3302282550303788f,
170
942k
          0.0702790691196284f,
171
942k
          -0.0741750459581035f,
172
942k
      },
173
942k
      {
174
942k
          0.2500000000000000,
175
942k
          0.2206518106944236f,
176
942k
          0.0000000000000000,
177
942k
          0.0000000000000000,
178
942k
          0.7071067811865476f,
179
942k
          0.6235485373547694f,
180
942k
          0.0000000000000000,
181
942k
          0.0000000000000000,
182
942k
          0.0000000000000000,
183
942k
          0.0000000000000000,
184
942k
          0.0000000000000000,
185
942k
          0.0000000000000000,
186
942k
          0.0000000000000000,
187
942k
          0.0000000000000000,
188
942k
          0.0000000000000000,
189
942k
          0.0000000000000000,
190
942k
      },
191
942k
      {
192
942k
          0.2500000000000000,
193
942k
          -0.1014005039375378f,
194
942k
          0.0000000000000000,
195
942k
          0.4706702258572536f,
196
942k
          0.0000000000000000,
197
942k
          -0.0643507165794628f,
198
942k
          -0.0403851516082220f,
199
942k
          0.0000000000000000,
200
942k
          0.1627234014286620f,
201
942k
          0.0000000000000000,
202
942k
          0.0000000000000000,
203
942k
          0.0000000000000000,
204
942k
          0.7367497537172237f,
205
942k
          0.0875511500058708f,
206
942k
          -0.2921026642334881f,
207
942k
          0.1940289303259434f,
208
942k
      },
209
942k
      {
210
942k
          0.2500000000000000,
211
942k
          -0.1014005039375377f,
212
942k
          0.1957439937204294f,
213
942k
          -0.1621205195722993f,
214
942k
          0.0000000000000000,
215
942k
          -0.0643507165794628f,
216
942k
          0.0074182263792424f,
217
942k
          -0.2904801297289980f,
218
942k
          0.0952002265347504f,
219
942k
          0.0000000000000000,
220
942k
          -0.3675398009862027f,
221
942k
          0.4921585901373873f,
222
942k
          0.2462710772207515f,
223
942k
          -0.0794670660590957f,
224
942k
          0.3623817333531167f,
225
942k
          -0.4351904965232280f,
226
942k
      },
227
942k
      {
228
942k
          0.2500000000000000,
229
942k
          -0.1014005039375376f,
230
942k
          0.2929100136981264f,
231
942k
          0.0000000000000000,
232
942k
          0.0000000000000000,
233
942k
          -0.0643507165794627f,
234
942k
          0.3935103426921017f,
235
942k
          -0.0657870154914280f,
236
942k
          0.0000000000000000,
237
942k
          -0.4082482904638628f,
238
942k
          -0.3078822139579090f,
239
942k
          -0.3852501370925192f,
240
942k
          -0.0857401903551931f,
241
942k
          -0.4613374887461511f,
242
942k
          0.0000000000000000,
243
942k
          0.2191868483885747f,
244
942k
      },
245
942k
      {
246
942k
          0.2500000000000000,
247
942k
          -0.1014005039375376f,
248
942k
          -0.4067007583026072f,
249
942k
          -0.2125574805828705f,
250
942k
          0.0000000000000000,
251
942k
          -0.0643507165794627f,
252
942k
          -0.4517556589999464f,
253
942k
          0.3046847507248840f,
254
942k
          0.3017929516615503f,
255
942k
          -0.4082482904638635f,
256
942k
          -0.1747866975480813f,
257
942k
          0.2110560104933581f,
258
942k
          -0.1426608480880734f,
259
942k
          -0.1381354035075829f,
260
942k
          -0.1743760259965108f,
261
942k
          0.1135498731499426f,
262
942k
      },
263
942k
      {
264
942k
          0.2500000000000000,
265
942k
          -0.1014005039375377f,
266
942k
          -0.1957439937204287f,
267
942k
          -0.1621205195722833f,
268
942k
          0.0000000000000000,
269
942k
          -0.0643507165794628f,
270
942k
          0.0074182263792444f,
271
942k
          0.2904801297290076f,
272
942k
          0.0952002265347505f,
273
942k
          0.0000000000000000,
274
942k
          0.3675398009862011f,
275
942k
          -0.4921585901373891f,
276
942k
          0.2462710772207514f,
277
942k
          -0.0794670660591026f,
278
942k
          0.3623817333531165f,
279
942k
          -0.4351904965232251f,
280
942k
      },
281
942k
      {
282
942k
          0.2500000000000000,
283
942k
          -0.1014005039375375f,
284
942k
          0.0000000000000000,
285
942k
          -0.4706702258572528f,
286
942k
          0.0000000000000000,
287
942k
          -0.0643507165794627f,
288
942k
          0.1107416575309343f,
289
942k
          0.0000000000000000,
290
942k
          -0.1627234014286617f,
291
942k
          0.0000000000000000,
292
942k
          0.0000000000000000,
293
942k
          0.0000000000000000,
294
942k
          0.1488339922711357f,
295
942k
          0.4972464710953509f,
296
942k
          0.2921026642334879f,
297
942k
          0.5550443808910661f,
298
942k
      },
299
942k
      {
300
942k
          0.2500000000000000,
301
942k
          -0.1014005039375377f,
302
942k
          0.1137907446044809f,
303
942k
          -0.1464291867126764f,
304
942k
          0.0000000000000000,
305
942k
          -0.0643507165794628f,
306
942k
          0.0829816309488205f,
307
942k
          -0.2388977352334460f,
308
942k
          -0.3531238544981630f,
309
942k
          -0.4082482904638630f,
310
942k
          0.4826689115059883f,
311
942k
          0.1741941265991622f,
312
942k
          -0.0476868035022925f,
313
942k
          0.1253805944856366f,
314
942k
          -0.4326608024727445f,
315
942k
          -0.2546827712406646f,
316
942k
      },
317
942k
      {
318
942k
          0.2500000000000000,
319
942k
          -0.1014005039375377f,
320
942k
          -0.4444481661973438f,
321
942k
          0.3085497062849487f,
322
942k
          0.0000000000000000,
323
942k
          -0.0643507165794628f,
324
942k
          0.1585450355183970f,
325
942k
          -0.5112616136592012f,
326
942k
          0.2579236279634129f,
327
942k
          0.0000000000000000,
328
942k
          -0.0812611176717504f,
329
942k
          -0.1856718091610990f,
330
942k
          -0.3416446842253373f,
331
942k
          0.3302282550303805f,
332
942k
          0.0702790691196282f,
333
942k
          -0.0741750459581023f,
334
942k
      },
335
942k
      {
336
942k
          0.2500000000000000,
337
942k
          -0.1014005039375376f,
338
942k
          -0.2929100136981264f,
339
942k
          0.0000000000000000,
340
942k
          0.0000000000000000,
341
942k
          -0.0643507165794627f,
342
942k
          0.3935103426921022f,
343
942k
          0.0657870154914254f,
344
942k
          0.0000000000000000,
345
942k
          0.4082482904638634f,
346
942k
          0.3078822139579031f,
347
942k
          0.3852501370925211f,
348
942k
          -0.0857401903551927f,
349
942k
          -0.4613374887461554f,
350
942k
          0.0000000000000000,
351
942k
          0.2191868483885728f,
352
942k
      },
353
942k
      {
354
942k
          0.2500000000000000,
355
942k
          -0.1014005039375376f,
356
942k
          -0.1137907446044814f,
357
942k
          -0.1464291867126654f,
358
942k
          0.0000000000000000,
359
942k
          -0.0643507165794627f,
360
942k
          0.0829816309488214f,
361
942k
          0.2388977352334547f,
362
942k
          -0.3531238544981624f,
363
942k
          0.4082482904638630f,
364
942k
          -0.4826689115059858f,
365
942k
          -0.1741941265991621f,
366
942k
          -0.0476868035022928f,
367
942k
          0.1253805944856431f,
368
942k
          -0.4326608024727457f,
369
942k
          -0.2546827712406641f,
370
942k
      },
371
942k
      {
372
942k
          0.2500000000000000,
373
942k
          -0.1014005039375374f,
374
942k
          0.0000000000000000,
375
942k
          0.4251149611657548f,
376
942k
          0.0000000000000000,
377
942k
          -0.0643507165794626f,
378
942k
          -0.4517556589999480f,
379
942k
          0.0000000000000000,
380
942k
          -0.6035859033230976f,
381
942k
          0.0000000000000000,
382
942k
          0.0000000000000000,
383
942k
          0.0000000000000000,
384
942k
          -0.1426608480880724f,
385
942k
          -0.1381354035075845f,
386
942k
          0.3487520519930227f,
387
942k
          0.1135498731499429f,
388
942k
      },
389
942k
  };
390
391
942k
  const HWY_CAPPED(float, 16) d;
392
2.82M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
1.88M
    auto scalar = Zero(d);
394
32.0M
    for (size_t j = 0; j < 16; j++) {
395
30.1M
      auto px = Set(d, pixels[j]);
396
30.1M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
30.1M
      scalar = MulAdd(px, basis, scalar);
398
30.1M
    }
399
1.88M
    Store(scalar, d, coeffs + i);
400
1.88M
  }
401
942k
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
59.3M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
59.3M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
59.3M
      {
102
59.3M
          0.2500000000000000,
103
59.3M
          0.8769029297991420f,
104
59.3M
          0.0000000000000000,
105
59.3M
          0.0000000000000000,
106
59.3M
          0.0000000000000000,
107
59.3M
          -0.4105377591765233f,
108
59.3M
          0.0000000000000000,
109
59.3M
          0.0000000000000000,
110
59.3M
          0.0000000000000000,
111
59.3M
          0.0000000000000000,
112
59.3M
          0.0000000000000000,
113
59.3M
          0.0000000000000000,
114
59.3M
          0.0000000000000000,
115
59.3M
          0.0000000000000000,
116
59.3M
          0.0000000000000000,
117
59.3M
          0.0000000000000000,
118
59.3M
      },
119
59.3M
      {
120
59.3M
          0.2500000000000000,
121
59.3M
          0.2206518106944235f,
122
59.3M
          0.0000000000000000,
123
59.3M
          0.0000000000000000,
124
59.3M
          -0.7071067811865474f,
125
59.3M
          0.6235485373547691f,
126
59.3M
          0.0000000000000000,
127
59.3M
          0.0000000000000000,
128
59.3M
          0.0000000000000000,
129
59.3M
          0.0000000000000000,
130
59.3M
          0.0000000000000000,
131
59.3M
          0.0000000000000000,
132
59.3M
          0.0000000000000000,
133
59.3M
          0.0000000000000000,
134
59.3M
          0.0000000000000000,
135
59.3M
          0.0000000000000000,
136
59.3M
      },
137
59.3M
      {
138
59.3M
          0.2500000000000000,
139
59.3M
          -0.1014005039375376f,
140
59.3M
          0.4067007583026075f,
141
59.3M
          -0.2125574805828875f,
142
59.3M
          0.0000000000000000,
143
59.3M
          -0.0643507165794627f,
144
59.3M
          -0.4517556589999482f,
145
59.3M
          -0.3046847507248690f,
146
59.3M
          0.3017929516615495f,
147
59.3M
          0.4082482904638627f,
148
59.3M
          0.1747866975480809f,
149
59.3M
          -0.2110560104933578f,
150
59.3M
          -0.1426608480880726f,
151
59.3M
          -0.1381354035075859f,
152
59.3M
          -0.1743760259965107f,
153
59.3M
          0.1135498731499434f,
154
59.3M
      },
155
59.3M
      {
156
59.3M
          0.2500000000000000,
157
59.3M
          -0.1014005039375375f,
158
59.3M
          0.4444481661973445f,
159
59.3M
          0.3085497062849767f,
160
59.3M
          0.0000000000000000f,
161
59.3M
          -0.0643507165794627f,
162
59.3M
          0.1585450355184006f,
163
59.3M
          0.5112616136591823f,
164
59.3M
          0.2579236279634118f,
165
59.3M
          0.0000000000000000,
166
59.3M
          0.0812611176717539f,
167
59.3M
          0.1856718091610980f,
168
59.3M
          -0.3416446842253372f,
169
59.3M
          0.3302282550303788f,
170
59.3M
          0.0702790691196284f,
171
59.3M
          -0.0741750459581035f,
172
59.3M
      },
173
59.3M
      {
174
59.3M
          0.2500000000000000,
175
59.3M
          0.2206518106944236f,
176
59.3M
          0.0000000000000000,
177
59.3M
          0.0000000000000000,
178
59.3M
          0.7071067811865476f,
179
59.3M
          0.6235485373547694f,
180
59.3M
          0.0000000000000000,
181
59.3M
          0.0000000000000000,
182
59.3M
          0.0000000000000000,
183
59.3M
          0.0000000000000000,
184
59.3M
          0.0000000000000000,
185
59.3M
          0.0000000000000000,
186
59.3M
          0.0000000000000000,
187
59.3M
          0.0000000000000000,
188
59.3M
          0.0000000000000000,
189
59.3M
          0.0000000000000000,
190
59.3M
      },
191
59.3M
      {
192
59.3M
          0.2500000000000000,
193
59.3M
          -0.1014005039375378f,
194
59.3M
          0.0000000000000000,
195
59.3M
          0.4706702258572536f,
196
59.3M
          0.0000000000000000,
197
59.3M
          -0.0643507165794628f,
198
59.3M
          -0.0403851516082220f,
199
59.3M
          0.0000000000000000,
200
59.3M
          0.1627234014286620f,
201
59.3M
          0.0000000000000000,
202
59.3M
          0.0000000000000000,
203
59.3M
          0.0000000000000000,
204
59.3M
          0.7367497537172237f,
205
59.3M
          0.0875511500058708f,
206
59.3M
          -0.2921026642334881f,
207
59.3M
          0.1940289303259434f,
208
59.3M
      },
209
59.3M
      {
210
59.3M
          0.2500000000000000,
211
59.3M
          -0.1014005039375377f,
212
59.3M
          0.1957439937204294f,
213
59.3M
          -0.1621205195722993f,
214
59.3M
          0.0000000000000000,
215
59.3M
          -0.0643507165794628f,
216
59.3M
          0.0074182263792424f,
217
59.3M
          -0.2904801297289980f,
218
59.3M
          0.0952002265347504f,
219
59.3M
          0.0000000000000000,
220
59.3M
          -0.3675398009862027f,
221
59.3M
          0.4921585901373873f,
222
59.3M
          0.2462710772207515f,
223
59.3M
          -0.0794670660590957f,
224
59.3M
          0.3623817333531167f,
225
59.3M
          -0.4351904965232280f,
226
59.3M
      },
227
59.3M
      {
228
59.3M
          0.2500000000000000,
229
59.3M
          -0.1014005039375376f,
230
59.3M
          0.2929100136981264f,
231
59.3M
          0.0000000000000000,
232
59.3M
          0.0000000000000000,
233
59.3M
          -0.0643507165794627f,
234
59.3M
          0.3935103426921017f,
235
59.3M
          -0.0657870154914280f,
236
59.3M
          0.0000000000000000,
237
59.3M
          -0.4082482904638628f,
238
59.3M
          -0.3078822139579090f,
239
59.3M
          -0.3852501370925192f,
240
59.3M
          -0.0857401903551931f,
241
59.3M
          -0.4613374887461511f,
242
59.3M
          0.0000000000000000,
243
59.3M
          0.2191868483885747f,
244
59.3M
      },
245
59.3M
      {
246
59.3M
          0.2500000000000000,
247
59.3M
          -0.1014005039375376f,
248
59.3M
          -0.4067007583026072f,
249
59.3M
          -0.2125574805828705f,
250
59.3M
          0.0000000000000000,
251
59.3M
          -0.0643507165794627f,
252
59.3M
          -0.4517556589999464f,
253
59.3M
          0.3046847507248840f,
254
59.3M
          0.3017929516615503f,
255
59.3M
          -0.4082482904638635f,
256
59.3M
          -0.1747866975480813f,
257
59.3M
          0.2110560104933581f,
258
59.3M
          -0.1426608480880734f,
259
59.3M
          -0.1381354035075829f,
260
59.3M
          -0.1743760259965108f,
261
59.3M
          0.1135498731499426f,
262
59.3M
      },
263
59.3M
      {
264
59.3M
          0.2500000000000000,
265
59.3M
          -0.1014005039375377f,
266
59.3M
          -0.1957439937204287f,
267
59.3M
          -0.1621205195722833f,
268
59.3M
          0.0000000000000000,
269
59.3M
          -0.0643507165794628f,
270
59.3M
          0.0074182263792444f,
271
59.3M
          0.2904801297290076f,
272
59.3M
          0.0952002265347505f,
273
59.3M
          0.0000000000000000,
274
59.3M
          0.3675398009862011f,
275
59.3M
          -0.4921585901373891f,
276
59.3M
          0.2462710772207514f,
277
59.3M
          -0.0794670660591026f,
278
59.3M
          0.3623817333531165f,
279
59.3M
          -0.4351904965232251f,
280
59.3M
      },
281
59.3M
      {
282
59.3M
          0.2500000000000000,
283
59.3M
          -0.1014005039375375f,
284
59.3M
          0.0000000000000000,
285
59.3M
          -0.4706702258572528f,
286
59.3M
          0.0000000000000000,
287
59.3M
          -0.0643507165794627f,
288
59.3M
          0.1107416575309343f,
289
59.3M
          0.0000000000000000,
290
59.3M
          -0.1627234014286617f,
291
59.3M
          0.0000000000000000,
292
59.3M
          0.0000000000000000,
293
59.3M
          0.0000000000000000,
294
59.3M
          0.1488339922711357f,
295
59.3M
          0.4972464710953509f,
296
59.3M
          0.2921026642334879f,
297
59.3M
          0.5550443808910661f,
298
59.3M
      },
299
59.3M
      {
300
59.3M
          0.2500000000000000,
301
59.3M
          -0.1014005039375377f,
302
59.3M
          0.1137907446044809f,
303
59.3M
          -0.1464291867126764f,
304
59.3M
          0.0000000000000000,
305
59.3M
          -0.0643507165794628f,
306
59.3M
          0.0829816309488205f,
307
59.3M
          -0.2388977352334460f,
308
59.3M
          -0.3531238544981630f,
309
59.3M
          -0.4082482904638630f,
310
59.3M
          0.4826689115059883f,
311
59.3M
          0.1741941265991622f,
312
59.3M
          -0.0476868035022925f,
313
59.3M
          0.1253805944856366f,
314
59.3M
          -0.4326608024727445f,
315
59.3M
          -0.2546827712406646f,
316
59.3M
      },
317
59.3M
      {
318
59.3M
          0.2500000000000000,
319
59.3M
          -0.1014005039375377f,
320
59.3M
          -0.4444481661973438f,
321
59.3M
          0.3085497062849487f,
322
59.3M
          0.0000000000000000,
323
59.3M
          -0.0643507165794628f,
324
59.3M
          0.1585450355183970f,
325
59.3M
          -0.5112616136592012f,
326
59.3M
          0.2579236279634129f,
327
59.3M
          0.0000000000000000,
328
59.3M
          -0.0812611176717504f,
329
59.3M
          -0.1856718091610990f,
330
59.3M
          -0.3416446842253373f,
331
59.3M
          0.3302282550303805f,
332
59.3M
          0.0702790691196282f,
333
59.3M
          -0.0741750459581023f,
334
59.3M
      },
335
59.3M
      {
336
59.3M
          0.2500000000000000,
337
59.3M
          -0.1014005039375376f,
338
59.3M
          -0.2929100136981264f,
339
59.3M
          0.0000000000000000,
340
59.3M
          0.0000000000000000,
341
59.3M
          -0.0643507165794627f,
342
59.3M
          0.3935103426921022f,
343
59.3M
          0.0657870154914254f,
344
59.3M
          0.0000000000000000,
345
59.3M
          0.4082482904638634f,
346
59.3M
          0.3078822139579031f,
347
59.3M
          0.3852501370925211f,
348
59.3M
          -0.0857401903551927f,
349
59.3M
          -0.4613374887461554f,
350
59.3M
          0.0000000000000000,
351
59.3M
          0.2191868483885728f,
352
59.3M
      },
353
59.3M
      {
354
59.3M
          0.2500000000000000,
355
59.3M
          -0.1014005039375376f,
356
59.3M
          -0.1137907446044814f,
357
59.3M
          -0.1464291867126654f,
358
59.3M
          0.0000000000000000,
359
59.3M
          -0.0643507165794627f,
360
59.3M
          0.0829816309488214f,
361
59.3M
          0.2388977352334547f,
362
59.3M
          -0.3531238544981624f,
363
59.3M
          0.4082482904638630f,
364
59.3M
          -0.4826689115059858f,
365
59.3M
          -0.1741941265991621f,
366
59.3M
          -0.0476868035022928f,
367
59.3M
          0.1253805944856431f,
368
59.3M
          -0.4326608024727457f,
369
59.3M
          -0.2546827712406641f,
370
59.3M
      },
371
59.3M
      {
372
59.3M
          0.2500000000000000,
373
59.3M
          -0.1014005039375374f,
374
59.3M
          0.0000000000000000,
375
59.3M
          0.4251149611657548f,
376
59.3M
          0.0000000000000000,
377
59.3M
          -0.0643507165794626f,
378
59.3M
          -0.4517556589999480f,
379
59.3M
          0.0000000000000000,
380
59.3M
          -0.6035859033230976f,
381
59.3M
          0.0000000000000000,
382
59.3M
          0.0000000000000000,
383
59.3M
          0.0000000000000000,
384
59.3M
          -0.1426608480880724f,
385
59.3M
          -0.1381354035075845f,
386
59.3M
          0.3487520519930227f,
387
59.3M
          0.1135498731499429f,
388
59.3M
      },
389
59.3M
  };
390
391
59.3M
  const HWY_CAPPED(float, 16) d;
392
177M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
118M
    auto scalar = Zero(d);
394
2.01G
    for (size_t j = 0; j < 16; j++) {
395
1.89G
      auto px = Set(d, pixels[j]);
396
1.89G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
1.89G
      scalar = MulAdd(px, basis, scalar);
398
1.89G
    }
399
118M
    Store(scalar, d, coeffs + i);
400
118M
  }
401
59.3M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
942k
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
942k
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
942k
      {
102
942k
          0.2500000000000000,
103
942k
          0.8769029297991420f,
104
942k
          0.0000000000000000,
105
942k
          0.0000000000000000,
106
942k
          0.0000000000000000,
107
942k
          -0.4105377591765233f,
108
942k
          0.0000000000000000,
109
942k
          0.0000000000000000,
110
942k
          0.0000000000000000,
111
942k
          0.0000000000000000,
112
942k
          0.0000000000000000,
113
942k
          0.0000000000000000,
114
942k
          0.0000000000000000,
115
942k
          0.0000000000000000,
116
942k
          0.0000000000000000,
117
942k
          0.0000000000000000,
118
942k
      },
119
942k
      {
120
942k
          0.2500000000000000,
121
942k
          0.2206518106944235f,
122
942k
          0.0000000000000000,
123
942k
          0.0000000000000000,
124
942k
          -0.7071067811865474f,
125
942k
          0.6235485373547691f,
126
942k
          0.0000000000000000,
127
942k
          0.0000000000000000,
128
942k
          0.0000000000000000,
129
942k
          0.0000000000000000,
130
942k
          0.0000000000000000,
131
942k
          0.0000000000000000,
132
942k
          0.0000000000000000,
133
942k
          0.0000000000000000,
134
942k
          0.0000000000000000,
135
942k
          0.0000000000000000,
136
942k
      },
137
942k
      {
138
942k
          0.2500000000000000,
139
942k
          -0.1014005039375376f,
140
942k
          0.4067007583026075f,
141
942k
          -0.2125574805828875f,
142
942k
          0.0000000000000000,
143
942k
          -0.0643507165794627f,
144
942k
          -0.4517556589999482f,
145
942k
          -0.3046847507248690f,
146
942k
          0.3017929516615495f,
147
942k
          0.4082482904638627f,
148
942k
          0.1747866975480809f,
149
942k
          -0.2110560104933578f,
150
942k
          -0.1426608480880726f,
151
942k
          -0.1381354035075859f,
152
942k
          -0.1743760259965107f,
153
942k
          0.1135498731499434f,
154
942k
      },
155
942k
      {
156
942k
          0.2500000000000000,
157
942k
          -0.1014005039375375f,
158
942k
          0.4444481661973445f,
159
942k
          0.3085497062849767f,
160
942k
          0.0000000000000000f,
161
942k
          -0.0643507165794627f,
162
942k
          0.1585450355184006f,
163
942k
          0.5112616136591823f,
164
942k
          0.2579236279634118f,
165
942k
          0.0000000000000000,
166
942k
          0.0812611176717539f,
167
942k
          0.1856718091610980f,
168
942k
          -0.3416446842253372f,
169
942k
          0.3302282550303788f,
170
942k
          0.0702790691196284f,
171
942k
          -0.0741750459581035f,
172
942k
      },
173
942k
      {
174
942k
          0.2500000000000000,
175
942k
          0.2206518106944236f,
176
942k
          0.0000000000000000,
177
942k
          0.0000000000000000,
178
942k
          0.7071067811865476f,
179
942k
          0.6235485373547694f,
180
942k
          0.0000000000000000,
181
942k
          0.0000000000000000,
182
942k
          0.0000000000000000,
183
942k
          0.0000000000000000,
184
942k
          0.0000000000000000,
185
942k
          0.0000000000000000,
186
942k
          0.0000000000000000,
187
942k
          0.0000000000000000,
188
942k
          0.0000000000000000,
189
942k
          0.0000000000000000,
190
942k
      },
191
942k
      {
192
942k
          0.2500000000000000,
193
942k
          -0.1014005039375378f,
194
942k
          0.0000000000000000,
195
942k
          0.4706702258572536f,
196
942k
          0.0000000000000000,
197
942k
          -0.0643507165794628f,
198
942k
          -0.0403851516082220f,
199
942k
          0.0000000000000000,
200
942k
          0.1627234014286620f,
201
942k
          0.0000000000000000,
202
942k
          0.0000000000000000,
203
942k
          0.0000000000000000,
204
942k
          0.7367497537172237f,
205
942k
          0.0875511500058708f,
206
942k
          -0.2921026642334881f,
207
942k
          0.1940289303259434f,
208
942k
      },
209
942k
      {
210
942k
          0.2500000000000000,
211
942k
          -0.1014005039375377f,
212
942k
          0.1957439937204294f,
213
942k
          -0.1621205195722993f,
214
942k
          0.0000000000000000,
215
942k
          -0.0643507165794628f,
216
942k
          0.0074182263792424f,
217
942k
          -0.2904801297289980f,
218
942k
          0.0952002265347504f,
219
942k
          0.0000000000000000,
220
942k
          -0.3675398009862027f,
221
942k
          0.4921585901373873f,
222
942k
          0.2462710772207515f,
223
942k
          -0.0794670660590957f,
224
942k
          0.3623817333531167f,
225
942k
          -0.4351904965232280f,
226
942k
      },
227
942k
      {
228
942k
          0.2500000000000000,
229
942k
          -0.1014005039375376f,
230
942k
          0.2929100136981264f,
231
942k
          0.0000000000000000,
232
942k
          0.0000000000000000,
233
942k
          -0.0643507165794627f,
234
942k
          0.3935103426921017f,
235
942k
          -0.0657870154914280f,
236
942k
          0.0000000000000000,
237
942k
          -0.4082482904638628f,
238
942k
          -0.3078822139579090f,
239
942k
          -0.3852501370925192f,
240
942k
          -0.0857401903551931f,
241
942k
          -0.4613374887461511f,
242
942k
          0.0000000000000000,
243
942k
          0.2191868483885747f,
244
942k
      },
245
942k
      {
246
942k
          0.2500000000000000,
247
942k
          -0.1014005039375376f,
248
942k
          -0.4067007583026072f,
249
942k
          -0.2125574805828705f,
250
942k
          0.0000000000000000,
251
942k
          -0.0643507165794627f,
252
942k
          -0.4517556589999464f,
253
942k
          0.3046847507248840f,
254
942k
          0.3017929516615503f,
255
942k
          -0.4082482904638635f,
256
942k
          -0.1747866975480813f,
257
942k
          0.2110560104933581f,
258
942k
          -0.1426608480880734f,
259
942k
          -0.1381354035075829f,
260
942k
          -0.1743760259965108f,
261
942k
          0.1135498731499426f,
262
942k
      },
263
942k
      {
264
942k
          0.2500000000000000,
265
942k
          -0.1014005039375377f,
266
942k
          -0.1957439937204287f,
267
942k
          -0.1621205195722833f,
268
942k
          0.0000000000000000,
269
942k
          -0.0643507165794628f,
270
942k
          0.0074182263792444f,
271
942k
          0.2904801297290076f,
272
942k
          0.0952002265347505f,
273
942k
          0.0000000000000000,
274
942k
          0.3675398009862011f,
275
942k
          -0.4921585901373891f,
276
942k
          0.2462710772207514f,
277
942k
          -0.0794670660591026f,
278
942k
          0.3623817333531165f,
279
942k
          -0.4351904965232251f,
280
942k
      },
281
942k
      {
282
942k
          0.2500000000000000,
283
942k
          -0.1014005039375375f,
284
942k
          0.0000000000000000,
285
942k
          -0.4706702258572528f,
286
942k
          0.0000000000000000,
287
942k
          -0.0643507165794627f,
288
942k
          0.1107416575309343f,
289
942k
          0.0000000000000000,
290
942k
          -0.1627234014286617f,
291
942k
          0.0000000000000000,
292
942k
          0.0000000000000000,
293
942k
          0.0000000000000000,
294
942k
          0.1488339922711357f,
295
942k
          0.4972464710953509f,
296
942k
          0.2921026642334879f,
297
942k
          0.5550443808910661f,
298
942k
      },
299
942k
      {
300
942k
          0.2500000000000000,
301
942k
          -0.1014005039375377f,
302
942k
          0.1137907446044809f,
303
942k
          -0.1464291867126764f,
304
942k
          0.0000000000000000,
305
942k
          -0.0643507165794628f,
306
942k
          0.0829816309488205f,
307
942k
          -0.2388977352334460f,
308
942k
          -0.3531238544981630f,
309
942k
          -0.4082482904638630f,
310
942k
          0.4826689115059883f,
311
942k
          0.1741941265991622f,
312
942k
          -0.0476868035022925f,
313
942k
          0.1253805944856366f,
314
942k
          -0.4326608024727445f,
315
942k
          -0.2546827712406646f,
316
942k
      },
317
942k
      {
318
942k
          0.2500000000000000,
319
942k
          -0.1014005039375377f,
320
942k
          -0.4444481661973438f,
321
942k
          0.3085497062849487f,
322
942k
          0.0000000000000000,
323
942k
          -0.0643507165794628f,
324
942k
          0.1585450355183970f,
325
942k
          -0.5112616136592012f,
326
942k
          0.2579236279634129f,
327
942k
          0.0000000000000000,
328
942k
          -0.0812611176717504f,
329
942k
          -0.1856718091610990f,
330
942k
          -0.3416446842253373f,
331
942k
          0.3302282550303805f,
332
942k
          0.0702790691196282f,
333
942k
          -0.0741750459581023f,
334
942k
      },
335
942k
      {
336
942k
          0.2500000000000000,
337
942k
          -0.1014005039375376f,
338
942k
          -0.2929100136981264f,
339
942k
          0.0000000000000000,
340
942k
          0.0000000000000000,
341
942k
          -0.0643507165794627f,
342
942k
          0.3935103426921022f,
343
942k
          0.0657870154914254f,
344
942k
          0.0000000000000000,
345
942k
          0.4082482904638634f,
346
942k
          0.3078822139579031f,
347
942k
          0.3852501370925211f,
348
942k
          -0.0857401903551927f,
349
942k
          -0.4613374887461554f,
350
942k
          0.0000000000000000,
351
942k
          0.2191868483885728f,
352
942k
      },
353
942k
      {
354
942k
          0.2500000000000000,
355
942k
          -0.1014005039375376f,
356
942k
          -0.1137907446044814f,
357
942k
          -0.1464291867126654f,
358
942k
          0.0000000000000000,
359
942k
          -0.0643507165794627f,
360
942k
          0.0829816309488214f,
361
942k
          0.2388977352334547f,
362
942k
          -0.3531238544981624f,
363
942k
          0.4082482904638630f,
364
942k
          -0.4826689115059858f,
365
942k
          -0.1741941265991621f,
366
942k
          -0.0476868035022928f,
367
942k
          0.1253805944856431f,
368
942k
          -0.4326608024727457f,
369
942k
          -0.2546827712406641f,
370
942k
      },
371
942k
      {
372
942k
          0.2500000000000000,
373
942k
          -0.1014005039375374f,
374
942k
          0.0000000000000000,
375
942k
          0.4251149611657548f,
376
942k
          0.0000000000000000,
377
942k
          -0.0643507165794626f,
378
942k
          -0.4517556589999480f,
379
942k
          0.0000000000000000,
380
942k
          -0.6035859033230976f,
381
942k
          0.0000000000000000,
382
942k
          0.0000000000000000,
383
942k
          0.0000000000000000,
384
942k
          -0.1426608480880724f,
385
942k
          -0.1381354035075845f,
386
942k
          0.3487520519930227f,
387
942k
          0.1135498731499429f,
388
942k
      },
389
942k
  };
390
391
942k
  const HWY_CAPPED(float, 16) d;
392
2.82M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
1.88M
    auto scalar = Zero(d);
394
32.0M
    for (size_t j = 0; j < 16; j++) {
395
30.1M
      auto px = Set(d, pixels[j]);
396
30.1M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
30.1M
      scalar = MulAdd(px, basis, scalar);
398
30.1M
    }
399
1.88M
    Store(scalar, d, coeffs + i);
400
1.88M
  }
401
942k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
402
403
// Coefficient layout:
404
//  - (even, even) positions hold AFV coefficients
405
//  - (odd, even) positions hold DCT4x4 coefficients
406
//  - (any, odd) positions hold DCT4x8 coefficients
407
template <size_t afv_kind>
408
void AFVTransformFromPixels(const float* JXL_RESTRICT pixels,
409
                            size_t pixels_stride,
410
61.2M
                            float* JXL_RESTRICT coefficients) {
411
61.2M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
61.2M
  size_t afv_x = afv_kind & 1;
413
61.2M
  size_t afv_y = afv_kind / 2;
414
61.2M
  HWY_ALIGN float block[4 * 8] = {};
415
306M
  for (size_t iy = 0; iy < 4; iy++) {
416
1.22G
    for (size_t ix = 0; ix < 4; ix++) {
417
979M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
979M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
979M
    }
420
244M
  }
421
  // AFV coefficients in (even, even) positions.
422
61.2M
  HWY_ALIGN float coeff[4 * 4];
423
61.2M
  AFVDCT4x4(block, coeff);
424
306M
  for (size_t iy = 0; iy < 4; iy++) {
425
1.22G
    for (size_t ix = 0; ix < 4; ix++) {
426
979M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
979M
    }
428
244M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
61.2M
  ComputeScaledDCT<4, 4>()(
431
61.2M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
61.2M
              pixels_stride),
433
61.2M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
306M
  for (size_t iy = 0; iy < 4; iy++) {
436
2.20G
    for (size_t ix = 0; ix < 8; ix++) {
437
1.95G
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
1.95G
    }
439
244M
  }
440
  // 4x8 DCT of the other half of the block.
441
61.2M
  ComputeScaledDCT<4, 8>()(
442
61.2M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
61.2M
      block, scratch_space);
444
306M
  for (size_t iy = 0; iy < 4; iy++) {
445
2.20G
    for (size_t ix = 0; ix < 8; ix++) {
446
1.95G
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
1.95G
    }
448
244M
  }
449
61.2M
  float block00 = coefficients[0] * 0.25f;
450
61.2M
  float block01 = coefficients[1];
451
61.2M
  float block10 = coefficients[8];
452
61.2M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
61.2M
  coefficients[1] = (block00 - block01) * 0.5f;
454
61.2M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
61.2M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
232k
                            float* JXL_RESTRICT coefficients) {
411
232k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
232k
  size_t afv_x = afv_kind & 1;
413
232k
  size_t afv_y = afv_kind / 2;
414
232k
  HWY_ALIGN float block[4 * 8] = {};
415
1.16M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.64M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.71M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
3.71M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
3.71M
    }
420
928k
  }
421
  // AFV coefficients in (even, even) positions.
422
232k
  HWY_ALIGN float coeff[4 * 4];
423
232k
  AFVDCT4x4(block, coeff);
424
1.16M
  for (size_t iy = 0; iy < 4; iy++) {
425
4.64M
    for (size_t ix = 0; ix < 4; ix++) {
426
3.71M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
3.71M
    }
428
928k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
232k
  ComputeScaledDCT<4, 4>()(
431
232k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
232k
              pixels_stride),
433
232k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.16M
  for (size_t iy = 0; iy < 4; iy++) {
436
8.36M
    for (size_t ix = 0; ix < 8; ix++) {
437
7.43M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
7.43M
    }
439
928k
  }
440
  // 4x8 DCT of the other half of the block.
441
232k
  ComputeScaledDCT<4, 8>()(
442
232k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
232k
      block, scratch_space);
444
1.16M
  for (size_t iy = 0; iy < 4; iy++) {
445
8.36M
    for (size_t ix = 0; ix < 8; ix++) {
446
7.43M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
7.43M
    }
448
928k
  }
449
232k
  float block00 = coefficients[0] * 0.25f;
450
232k
  float block01 = coefficients[1];
451
232k
  float block10 = coefficients[8];
452
232k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
232k
  coefficients[1] = (block00 - block01) * 0.5f;
454
232k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
232k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
244k
                            float* JXL_RESTRICT coefficients) {
411
244k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
244k
  size_t afv_x = afv_kind & 1;
413
244k
  size_t afv_y = afv_kind / 2;
414
244k
  HWY_ALIGN float block[4 * 8] = {};
415
1.22M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.88M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.90M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
3.90M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
3.90M
    }
420
976k
  }
421
  // AFV coefficients in (even, even) positions.
422
244k
  HWY_ALIGN float coeff[4 * 4];
423
244k
  AFVDCT4x4(block, coeff);
424
1.22M
  for (size_t iy = 0; iy < 4; iy++) {
425
4.88M
    for (size_t ix = 0; ix < 4; ix++) {
426
3.90M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
3.90M
    }
428
976k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
244k
  ComputeScaledDCT<4, 4>()(
431
244k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
244k
              pixels_stride),
433
244k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.22M
  for (size_t iy = 0; iy < 4; iy++) {
436
8.78M
    for (size_t ix = 0; ix < 8; ix++) {
437
7.80M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
7.80M
    }
439
976k
  }
440
  // 4x8 DCT of the other half of the block.
441
244k
  ComputeScaledDCT<4, 8>()(
442
244k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
244k
      block, scratch_space);
444
1.22M
  for (size_t iy = 0; iy < 4; iy++) {
445
8.78M
    for (size_t ix = 0; ix < 8; ix++) {
446
7.80M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
7.80M
    }
448
976k
  }
449
244k
  float block00 = coefficients[0] * 0.25f;
450
244k
  float block01 = coefficients[1];
451
244k
  float block10 = coefficients[8];
452
244k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
244k
  coefficients[1] = (block00 - block01) * 0.5f;
454
244k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
244k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
220k
                            float* JXL_RESTRICT coefficients) {
411
220k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
220k
  size_t afv_x = afv_kind & 1;
413
220k
  size_t afv_y = afv_kind / 2;
414
220k
  HWY_ALIGN float block[4 * 8] = {};
415
1.10M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.41M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.52M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
3.52M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
3.52M
    }
420
882k
  }
421
  // AFV coefficients in (even, even) positions.
422
220k
  HWY_ALIGN float coeff[4 * 4];
423
220k
  AFVDCT4x4(block, coeff);
424
1.10M
  for (size_t iy = 0; iy < 4; iy++) {
425
4.41M
    for (size_t ix = 0; ix < 4; ix++) {
426
3.52M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
3.52M
    }
428
882k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
220k
  ComputeScaledDCT<4, 4>()(
431
220k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
220k
              pixels_stride),
433
220k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.10M
  for (size_t iy = 0; iy < 4; iy++) {
436
7.94M
    for (size_t ix = 0; ix < 8; ix++) {
437
7.05M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
7.05M
    }
439
882k
  }
440
  // 4x8 DCT of the other half of the block.
441
220k
  ComputeScaledDCT<4, 8>()(
442
220k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
220k
      block, scratch_space);
444
1.10M
  for (size_t iy = 0; iy < 4; iy++) {
445
7.94M
    for (size_t ix = 0; ix < 8; ix++) {
446
7.05M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
7.05M
    }
448
882k
  }
449
220k
  float block00 = coefficients[0] * 0.25f;
450
220k
  float block01 = coefficients[1];
451
220k
  float block10 = coefficients[8];
452
220k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
220k
  coefficients[1] = (block00 - block01) * 0.5f;
454
220k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
220k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
245k
                            float* JXL_RESTRICT coefficients) {
411
245k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
245k
  size_t afv_x = afv_kind & 1;
413
245k
  size_t afv_y = afv_kind / 2;
414
245k
  HWY_ALIGN float block[4 * 8] = {};
415
1.22M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.90M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.92M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
3.92M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
3.92M
    }
420
981k
  }
421
  // AFV coefficients in (even, even) positions.
422
245k
  HWY_ALIGN float coeff[4 * 4];
423
245k
  AFVDCT4x4(block, coeff);
424
1.22M
  for (size_t iy = 0; iy < 4; iy++) {
425
4.90M
    for (size_t ix = 0; ix < 4; ix++) {
426
3.92M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
3.92M
    }
428
981k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
245k
  ComputeScaledDCT<4, 4>()(
431
245k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
245k
              pixels_stride),
433
245k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.22M
  for (size_t iy = 0; iy < 4; iy++) {
436
8.83M
    for (size_t ix = 0; ix < 8; ix++) {
437
7.85M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
7.85M
    }
439
981k
  }
440
  // 4x8 DCT of the other half of the block.
441
245k
  ComputeScaledDCT<4, 8>()(
442
245k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
245k
      block, scratch_space);
444
1.22M
  for (size_t iy = 0; iy < 4; iy++) {
445
8.83M
    for (size_t ix = 0; ix < 8; ix++) {
446
7.85M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
7.85M
    }
448
981k
  }
449
245k
  float block00 = coefficients[0] * 0.25f;
450
245k
  float block01 = coefficients[1];
451
245k
  float block10 = coefficients[8];
452
245k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
245k
  coefficients[1] = (block00 - block01) * 0.5f;
454
245k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
245k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
14.8M
                            float* JXL_RESTRICT coefficients) {
411
14.8M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
14.8M
  size_t afv_x = afv_kind & 1;
413
14.8M
  size_t afv_y = afv_kind / 2;
414
14.8M
  HWY_ALIGN float block[4 * 8] = {};
415
74.1M
  for (size_t iy = 0; iy < 4; iy++) {
416
296M
    for (size_t ix = 0; ix < 4; ix++) {
417
237M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
237M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
237M
    }
420
59.3M
  }
421
  // AFV coefficients in (even, even) positions.
422
14.8M
  HWY_ALIGN float coeff[4 * 4];
423
14.8M
  AFVDCT4x4(block, coeff);
424
74.1M
  for (size_t iy = 0; iy < 4; iy++) {
425
296M
    for (size_t ix = 0; ix < 4; ix++) {
426
237M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
237M
    }
428
59.3M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
14.8M
  ComputeScaledDCT<4, 4>()(
431
14.8M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
14.8M
              pixels_stride),
433
14.8M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
74.1M
  for (size_t iy = 0; iy < 4; iy++) {
436
533M
    for (size_t ix = 0; ix < 8; ix++) {
437
474M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
474M
    }
439
59.3M
  }
440
  // 4x8 DCT of the other half of the block.
441
14.8M
  ComputeScaledDCT<4, 8>()(
442
14.8M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
14.8M
      block, scratch_space);
444
74.1M
  for (size_t iy = 0; iy < 4; iy++) {
445
533M
    for (size_t ix = 0; ix < 8; ix++) {
446
474M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
474M
    }
448
59.3M
  }
449
14.8M
  float block00 = coefficients[0] * 0.25f;
450
14.8M
  float block01 = coefficients[1];
451
14.8M
  float block10 = coefficients[8];
452
14.8M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
14.8M
  coefficients[1] = (block00 - block01) * 0.5f;
454
14.8M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
14.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
14.8M
                            float* JXL_RESTRICT coefficients) {
411
14.8M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
14.8M
  size_t afv_x = afv_kind & 1;
413
14.8M
  size_t afv_y = afv_kind / 2;
414
14.8M
  HWY_ALIGN float block[4 * 8] = {};
415
74.1M
  for (size_t iy = 0; iy < 4; iy++) {
416
296M
    for (size_t ix = 0; ix < 4; ix++) {
417
237M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
237M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
237M
    }
420
59.3M
  }
421
  // AFV coefficients in (even, even) positions.
422
14.8M
  HWY_ALIGN float coeff[4 * 4];
423
14.8M
  AFVDCT4x4(block, coeff);
424
74.1M
  for (size_t iy = 0; iy < 4; iy++) {
425
296M
    for (size_t ix = 0; ix < 4; ix++) {
426
237M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
237M
    }
428
59.3M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
14.8M
  ComputeScaledDCT<4, 4>()(
431
14.8M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
14.8M
              pixels_stride),
433
14.8M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
74.1M
  for (size_t iy = 0; iy < 4; iy++) {
436
533M
    for (size_t ix = 0; ix < 8; ix++) {
437
474M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
474M
    }
439
59.3M
  }
440
  // 4x8 DCT of the other half of the block.
441
14.8M
  ComputeScaledDCT<4, 8>()(
442
14.8M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
14.8M
      block, scratch_space);
444
74.1M
  for (size_t iy = 0; iy < 4; iy++) {
445
533M
    for (size_t ix = 0; ix < 8; ix++) {
446
474M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
474M
    }
448
59.3M
  }
449
14.8M
  float block00 = coefficients[0] * 0.25f;
450
14.8M
  float block01 = coefficients[1];
451
14.8M
  float block10 = coefficients[8];
452
14.8M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
14.8M
  coefficients[1] = (block00 - block01) * 0.5f;
454
14.8M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
14.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
14.8M
                            float* JXL_RESTRICT coefficients) {
411
14.8M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
14.8M
  size_t afv_x = afv_kind & 1;
413
14.8M
  size_t afv_y = afv_kind / 2;
414
14.8M
  HWY_ALIGN float block[4 * 8] = {};
415
74.1M
  for (size_t iy = 0; iy < 4; iy++) {
416
296M
    for (size_t ix = 0; ix < 4; ix++) {
417
237M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
237M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
237M
    }
420
59.3M
  }
421
  // AFV coefficients in (even, even) positions.
422
14.8M
  HWY_ALIGN float coeff[4 * 4];
423
14.8M
  AFVDCT4x4(block, coeff);
424
74.1M
  for (size_t iy = 0; iy < 4; iy++) {
425
296M
    for (size_t ix = 0; ix < 4; ix++) {
426
237M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
237M
    }
428
59.3M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
14.8M
  ComputeScaledDCT<4, 4>()(
431
14.8M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
14.8M
              pixels_stride),
433
14.8M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
74.1M
  for (size_t iy = 0; iy < 4; iy++) {
436
533M
    for (size_t ix = 0; ix < 8; ix++) {
437
474M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
474M
    }
439
59.3M
  }
440
  // 4x8 DCT of the other half of the block.
441
14.8M
  ComputeScaledDCT<4, 8>()(
442
14.8M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
14.8M
      block, scratch_space);
444
74.1M
  for (size_t iy = 0; iy < 4; iy++) {
445
533M
    for (size_t ix = 0; ix < 8; ix++) {
446
474M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
474M
    }
448
59.3M
  }
449
14.8M
  float block00 = coefficients[0] * 0.25f;
450
14.8M
  float block01 = coefficients[1];
451
14.8M
  float block10 = coefficients[8];
452
14.8M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
14.8M
  coefficients[1] = (block00 - block01) * 0.5f;
454
14.8M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
14.8M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
14.8M
                            float* JXL_RESTRICT coefficients) {
411
14.8M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
14.8M
  size_t afv_x = afv_kind & 1;
413
14.8M
  size_t afv_y = afv_kind / 2;
414
14.8M
  HWY_ALIGN float block[4 * 8] = {};
415
74.1M
  for (size_t iy = 0; iy < 4; iy++) {
416
296M
    for (size_t ix = 0; ix < 4; ix++) {
417
237M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
237M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
237M
    }
420
59.3M
  }
421
  // AFV coefficients in (even, even) positions.
422
14.8M
  HWY_ALIGN float coeff[4 * 4];
423
14.8M
  AFVDCT4x4(block, coeff);
424
74.1M
  for (size_t iy = 0; iy < 4; iy++) {
425
296M
    for (size_t ix = 0; ix < 4; ix++) {
426
237M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
237M
    }
428
59.3M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
14.8M
  ComputeScaledDCT<4, 4>()(
431
14.8M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
14.8M
              pixels_stride),
433
14.8M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
74.1M
  for (size_t iy = 0; iy < 4; iy++) {
436
533M
    for (size_t ix = 0; ix < 8; ix++) {
437
474M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
474M
    }
439
59.3M
  }
440
  // 4x8 DCT of the other half of the block.
441
14.8M
  ComputeScaledDCT<4, 8>()(
442
14.8M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
14.8M
      block, scratch_space);
444
74.1M
  for (size_t iy = 0; iy < 4; iy++) {
445
533M
    for (size_t ix = 0; ix < 8; ix++) {
446
474M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
474M
    }
448
59.3M
  }
449
14.8M
  float block00 = coefficients[0] * 0.25f;
450
14.8M
  float block01 = coefficients[1];
451
14.8M
  float block10 = coefficients[8];
452
14.8M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
14.8M
  coefficients[1] = (block00 - block01) * 0.5f;
454
14.8M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
14.8M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
232k
                            float* JXL_RESTRICT coefficients) {
411
232k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
232k
  size_t afv_x = afv_kind & 1;
413
232k
  size_t afv_y = afv_kind / 2;
414
232k
  HWY_ALIGN float block[4 * 8] = {};
415
1.16M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.64M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.71M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
3.71M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
3.71M
    }
420
928k
  }
421
  // AFV coefficients in (even, even) positions.
422
232k
  HWY_ALIGN float coeff[4 * 4];
423
232k
  AFVDCT4x4(block, coeff);
424
1.16M
  for (size_t iy = 0; iy < 4; iy++) {
425
4.64M
    for (size_t ix = 0; ix < 4; ix++) {
426
3.71M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
3.71M
    }
428
928k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
232k
  ComputeScaledDCT<4, 4>()(
431
232k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
232k
              pixels_stride),
433
232k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.16M
  for (size_t iy = 0; iy < 4; iy++) {
436
8.36M
    for (size_t ix = 0; ix < 8; ix++) {
437
7.43M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
7.43M
    }
439
928k
  }
440
  // 4x8 DCT of the other half of the block.
441
232k
  ComputeScaledDCT<4, 8>()(
442
232k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
232k
      block, scratch_space);
444
1.16M
  for (size_t iy = 0; iy < 4; iy++) {
445
8.36M
    for (size_t ix = 0; ix < 8; ix++) {
446
7.43M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
7.43M
    }
448
928k
  }
449
232k
  float block00 = coefficients[0] * 0.25f;
450
232k
  float block01 = coefficients[1];
451
232k
  float block10 = coefficients[8];
452
232k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
232k
  coefficients[1] = (block00 - block01) * 0.5f;
454
232k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
232k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
244k
                            float* JXL_RESTRICT coefficients) {
411
244k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
244k
  size_t afv_x = afv_kind & 1;
413
244k
  size_t afv_y = afv_kind / 2;
414
244k
  HWY_ALIGN float block[4 * 8] = {};
415
1.22M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.88M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.90M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
3.90M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
3.90M
    }
420
976k
  }
421
  // AFV coefficients in (even, even) positions.
422
244k
  HWY_ALIGN float coeff[4 * 4];
423
244k
  AFVDCT4x4(block, coeff);
424
1.22M
  for (size_t iy = 0; iy < 4; iy++) {
425
4.88M
    for (size_t ix = 0; ix < 4; ix++) {
426
3.90M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
3.90M
    }
428
976k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
244k
  ComputeScaledDCT<4, 4>()(
431
244k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
244k
              pixels_stride),
433
244k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.22M
  for (size_t iy = 0; iy < 4; iy++) {
436
8.78M
    for (size_t ix = 0; ix < 8; ix++) {
437
7.80M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
7.80M
    }
439
976k
  }
440
  // 4x8 DCT of the other half of the block.
441
244k
  ComputeScaledDCT<4, 8>()(
442
244k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
244k
      block, scratch_space);
444
1.22M
  for (size_t iy = 0; iy < 4; iy++) {
445
8.78M
    for (size_t ix = 0; ix < 8; ix++) {
446
7.80M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
7.80M
    }
448
976k
  }
449
244k
  float block00 = coefficients[0] * 0.25f;
450
244k
  float block01 = coefficients[1];
451
244k
  float block10 = coefficients[8];
452
244k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
244k
  coefficients[1] = (block00 - block01) * 0.5f;
454
244k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
244k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
220k
                            float* JXL_RESTRICT coefficients) {
411
220k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
220k
  size_t afv_x = afv_kind & 1;
413
220k
  size_t afv_y = afv_kind / 2;
414
220k
  HWY_ALIGN float block[4 * 8] = {};
415
1.10M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.41M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.52M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
3.52M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
3.52M
    }
420
882k
  }
421
  // AFV coefficients in (even, even) positions.
422
220k
  HWY_ALIGN float coeff[4 * 4];
423
220k
  AFVDCT4x4(block, coeff);
424
1.10M
  for (size_t iy = 0; iy < 4; iy++) {
425
4.41M
    for (size_t ix = 0; ix < 4; ix++) {
426
3.52M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
3.52M
    }
428
882k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
220k
  ComputeScaledDCT<4, 4>()(
431
220k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
220k
              pixels_stride),
433
220k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.10M
  for (size_t iy = 0; iy < 4; iy++) {
436
7.94M
    for (size_t ix = 0; ix < 8; ix++) {
437
7.05M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
7.05M
    }
439
882k
  }
440
  // 4x8 DCT of the other half of the block.
441
220k
  ComputeScaledDCT<4, 8>()(
442
220k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
220k
      block, scratch_space);
444
1.10M
  for (size_t iy = 0; iy < 4; iy++) {
445
7.94M
    for (size_t ix = 0; ix < 8; ix++) {
446
7.05M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
7.05M
    }
448
882k
  }
449
220k
  float block00 = coefficients[0] * 0.25f;
450
220k
  float block01 = coefficients[1];
451
220k
  float block10 = coefficients[8];
452
220k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
220k
  coefficients[1] = (block00 - block01) * 0.5f;
454
220k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
220k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
245k
                            float* JXL_RESTRICT coefficients) {
411
245k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
245k
  size_t afv_x = afv_kind & 1;
413
245k
  size_t afv_y = afv_kind / 2;
414
245k
  HWY_ALIGN float block[4 * 8] = {};
415
1.22M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.90M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.92M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
3.92M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
3.92M
    }
420
981k
  }
421
  // AFV coefficients in (even, even) positions.
422
245k
  HWY_ALIGN float coeff[4 * 4];
423
245k
  AFVDCT4x4(block, coeff);
424
1.22M
  for (size_t iy = 0; iy < 4; iy++) {
425
4.90M
    for (size_t ix = 0; ix < 4; ix++) {
426
3.92M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
3.92M
    }
428
981k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
245k
  ComputeScaledDCT<4, 4>()(
431
245k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
245k
              pixels_stride),
433
245k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.22M
  for (size_t iy = 0; iy < 4; iy++) {
436
8.83M
    for (size_t ix = 0; ix < 8; ix++) {
437
7.85M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
7.85M
    }
439
981k
  }
440
  // 4x8 DCT of the other half of the block.
441
245k
  ComputeScaledDCT<4, 8>()(
442
245k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
245k
      block, scratch_space);
444
1.22M
  for (size_t iy = 0; iy < 4; iy++) {
445
8.83M
    for (size_t ix = 0; ix < 8; ix++) {
446
7.85M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
7.85M
    }
448
981k
  }
449
245k
  float block00 = coefficients[0] * 0.25f;
450
245k
  float block01 = coefficients[1];
451
245k
  float block10 = coefficients[8];
452
245k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
245k
  coefficients[1] = (block00 - block01) * 0.5f;
454
245k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
245k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
456
457
HWY_MAYBE_UNUSED void TransformFromPixels(const AcStrategyType strategy,
458
                                          const float* JXL_RESTRICT pixels,
459
                                          size_t pixels_stride,
460
                                          float* JXL_RESTRICT coefficients,
461
215M
                                          float* JXL_RESTRICT scratch_space) {
462
215M
  using Type = AcStrategyType;
463
215M
  switch (strategy) {
464
16.9M
    case Type::IDENTITY: {
465
50.8M
      for (size_t y = 0; y < 2; y++) {
466
101M
        for (size_t x = 0; x < 2; x++) {
467
67.7M
          float block_dc = 0;
468
338M
          for (size_t iy = 0; iy < 4; iy++) {
469
1.35G
            for (size_t ix = 0; ix < 4; ix++) {
470
1.08G
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
1.08G
            }
472
270M
          }
473
67.7M
          block_dc *= 1.0f / 16;
474
338M
          for (size_t iy = 0; iy < 4; iy++) {
475
1.35G
            for (size_t ix = 0; ix < 4; ix++) {
476
1.08G
              if (ix == 1 && iy == 1) continue;
477
1.01G
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
1.01G
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
1.01G
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
1.01G
            }
481
270M
          }
482
67.7M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
67.7M
          coefficients[y * 8 + x] = block_dc;
484
67.7M
        }
485
33.8M
      }
486
16.9M
      float block00 = coefficients[0];
487
16.9M
      float block01 = coefficients[1];
488
16.9M
      float block10 = coefficients[8];
489
16.9M
      float block11 = coefficients[9];
490
16.9M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
16.9M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
16.9M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
16.9M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
16.9M
      break;
495
0
    }
496
15.7M
    case Type::DCT8X4: {
497
47.1M
      for (size_t x = 0; x < 2; x++) {
498
31.4M
        HWY_ALIGN float block[4 * 8];
499
31.4M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
31.4M
                                 scratch_space);
501
157M
        for (size_t iy = 0; iy < 4; iy++) {
502
1.13G
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
1.00G
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
1.00G
          }
506
125M
        }
507
31.4M
      }
508
15.7M
      float block0 = coefficients[0];
509
15.7M
      float block1 = coefficients[8];
510
15.7M
      coefficients[0] = (block0 + block1) * 0.5f;
511
15.7M
      coefficients[8] = (block0 - block1) * 0.5f;
512
15.7M
      break;
513
0
    }
514
15.3M
    case Type::DCT4X8: {
515
46.1M
      for (size_t y = 0; y < 2; y++) {
516
30.7M
        HWY_ALIGN float block[4 * 8];
517
30.7M
        ComputeScaledDCT<4, 8>()(
518
30.7M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
30.7M
            scratch_space);
520
153M
        for (size_t iy = 0; iy < 4; iy++) {
521
1.10G
          for (size_t ix = 0; ix < 8; ix++) {
522
985M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
985M
          }
524
123M
        }
525
30.7M
      }
526
15.3M
      float block0 = coefficients[0];
527
15.3M
      float block1 = coefficients[8];
528
15.3M
      coefficients[0] = (block0 + block1) * 0.5f;
529
15.3M
      coefficients[8] = (block0 - block1) * 0.5f;
530
15.3M
      break;
531
0
    }
532
14.8M
    case Type::DCT4X4: {
533
44.5M
      for (size_t y = 0; y < 2; y++) {
534
89.0M
        for (size_t x = 0; x < 2; x++) {
535
59.3M
          HWY_ALIGN float block[4 * 4];
536
59.3M
          ComputeScaledDCT<4, 4>()(
537
59.3M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
59.3M
              block, scratch_space);
539
296M
          for (size_t iy = 0; iy < 4; iy++) {
540
1.18G
            for (size_t ix = 0; ix < 4; ix++) {
541
949M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
949M
            }
543
237M
          }
544
59.3M
        }
545
29.6M
      }
546
14.8M
      float block00 = coefficients[0];
547
14.8M
      float block01 = coefficients[1];
548
14.8M
      float block10 = coefficients[8];
549
14.8M
      float block11 = coefficients[9];
550
14.8M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
14.8M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
14.8M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
14.8M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
14.8M
      break;
555
0
    }
556
16.8M
    case Type::DCT2X2: {
557
16.8M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
16.8M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
16.8M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
16.8M
      break;
561
0
    }
562
6.51M
    case Type::DCT16X16: {
563
6.51M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
6.51M
                                 scratch_space);
565
6.51M
      break;
566
0
    }
567
12.6M
    case Type::DCT16X8: {
568
12.6M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
12.6M
                                scratch_space);
570
12.6M
      break;
571
0
    }
572
12.8M
    case Type::DCT8X16: {
573
12.8M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
12.8M
                                scratch_space);
575
12.8M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
2.56M
    case Type::DCT32X16: {
588
2.56M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
2.56M
                                 scratch_space);
590
2.56M
      break;
591
0
    }
592
2.67M
    case Type::DCT16X32: {
593
2.67M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
2.67M
                                 scratch_space);
595
2.67M
      break;
596
0
    }
597
1.49M
    case Type::DCT32X32: {
598
1.49M
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
1.49M
                                 scratch_space);
600
1.49M
      break;
601
0
    }
602
34.7M
    case Type::DCT: {
603
34.7M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
34.7M
                               scratch_space);
605
34.7M
      break;
606
0
    }
607
15.2M
    case Type::AFV0: {
608
15.2M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
15.2M
      break;
610
0
    }
611
15.3M
    case Type::AFV1: {
612
15.3M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
15.3M
      break;
614
0
    }
615
15.2M
    case Type::AFV2: {
616
15.2M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
15.2M
      break;
618
0
    }
619
15.3M
    case Type::AFV3: {
620
15.3M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
15.3M
      break;
622
0
    }
623
273k
    case Type::DCT64X64: {
624
273k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
273k
                                 scratch_space);
626
273k
      break;
627
0
    }
628
767k
    case Type::DCT64X32: {
629
767k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
767k
                                 scratch_space);
631
767k
      break;
632
0
    }
633
535k
    case Type::DCT32X64: {
634
535k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
535k
                                 scratch_space);
636
535k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
215M
  }
669
215M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
7.50M
                                          float* JXL_RESTRICT scratch_space) {
462
7.50M
  using Type = AcStrategyType;
463
7.50M
  switch (strategy) {
464
1.05M
    case Type::IDENTITY: {
465
3.15M
      for (size_t y = 0; y < 2; y++) {
466
6.31M
        for (size_t x = 0; x < 2; x++) {
467
4.20M
          float block_dc = 0;
468
21.0M
          for (size_t iy = 0; iy < 4; iy++) {
469
84.1M
            for (size_t ix = 0; ix < 4; ix++) {
470
67.3M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
67.3M
            }
472
16.8M
          }
473
4.20M
          block_dc *= 1.0f / 16;
474
21.0M
          for (size_t iy = 0; iy < 4; iy++) {
475
84.1M
            for (size_t ix = 0; ix < 4; ix++) {
476
67.3M
              if (ix == 1 && iy == 1) continue;
477
63.1M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
63.1M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
63.1M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
63.1M
            }
481
16.8M
          }
482
4.20M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
4.20M
          coefficients[y * 8 + x] = block_dc;
484
4.20M
        }
485
2.10M
      }
486
1.05M
      float block00 = coefficients[0];
487
1.05M
      float block01 = coefficients[1];
488
1.05M
      float block10 = coefficients[8];
489
1.05M
      float block11 = coefficients[9];
490
1.05M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
1.05M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
1.05M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
1.05M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
1.05M
      break;
495
0
    }
496
447k
    case Type::DCT8X4: {
497
1.34M
      for (size_t x = 0; x < 2; x++) {
498
894k
        HWY_ALIGN float block[4 * 8];
499
894k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
894k
                                 scratch_space);
501
4.47M
        for (size_t iy = 0; iy < 4; iy++) {
502
32.1M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
28.6M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
28.6M
          }
506
3.57M
        }
507
894k
      }
508
447k
      float block0 = coefficients[0];
509
447k
      float block1 = coefficients[8];
510
447k
      coefficients[0] = (block0 + block1) * 0.5f;
511
447k
      coefficients[8] = (block0 - block1) * 0.5f;
512
447k
      break;
513
0
    }
514
281k
    case Type::DCT4X8: {
515
844k
      for (size_t y = 0; y < 2; y++) {
516
563k
        HWY_ALIGN float block[4 * 8];
517
563k
        ComputeScaledDCT<4, 8>()(
518
563k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
563k
            scratch_space);
520
2.81M
        for (size_t iy = 0; iy < 4; iy++) {
521
20.2M
          for (size_t ix = 0; ix < 8; ix++) {
522
18.0M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
18.0M
          }
524
2.25M
        }
525
563k
      }
526
281k
      float block0 = coefficients[0];
527
281k
      float block1 = coefficients[8];
528
281k
      coefficients[0] = (block0 + block1) * 0.5f;
529
281k
      coefficients[8] = (block0 - block1) * 0.5f;
530
281k
      break;
531
0
    }
532
2.32k
    case Type::DCT4X4: {
533
6.96k
      for (size_t y = 0; y < 2; y++) {
534
13.9k
        for (size_t x = 0; x < 2; x++) {
535
9.28k
          HWY_ALIGN float block[4 * 4];
536
9.28k
          ComputeScaledDCT<4, 4>()(
537
9.28k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
9.28k
              block, scratch_space);
539
46.4k
          for (size_t iy = 0; iy < 4; iy++) {
540
185k
            for (size_t ix = 0; ix < 4; ix++) {
541
148k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
148k
            }
543
37.1k
          }
544
9.28k
        }
545
4.64k
      }
546
2.32k
      float block00 = coefficients[0];
547
2.32k
      float block01 = coefficients[1];
548
2.32k
      float block10 = coefficients[8];
549
2.32k
      float block11 = coefficients[9];
550
2.32k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
2.32k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
2.32k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
2.32k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
2.32k
      break;
555
0
    }
556
1.00M
    case Type::DCT2X2: {
557
1.00M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
1.00M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
1.00M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
1.00M
      break;
561
0
    }
562
178k
    case Type::DCT16X16: {
563
178k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
178k
                                 scratch_space);
565
178k
      break;
566
0
    }
567
297k
    case Type::DCT16X8: {
568
297k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
297k
                                scratch_space);
570
297k
      break;
571
0
    }
572
378k
    case Type::DCT8X16: {
573
378k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
378k
                                scratch_space);
575
378k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
80.1k
    case Type::DCT32X16: {
588
80.1k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
80.1k
                                 scratch_space);
590
80.1k
      break;
591
0
    }
592
123k
    case Type::DCT16X32: {
593
123k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
123k
                                 scratch_space);
595
123k
      break;
596
0
    }
597
130k
    case Type::DCT32X32: {
598
130k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
130k
                                 scratch_space);
600
130k
      break;
601
0
    }
602
2.53M
    case Type::DCT: {
603
2.53M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
2.53M
                               scratch_space);
605
2.53M
      break;
606
0
    }
607
232k
    case Type::AFV0: {
608
232k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
232k
      break;
610
0
    }
611
244k
    case Type::AFV1: {
612
244k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
244k
      break;
614
0
    }
615
220k
    case Type::AFV2: {
616
220k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
220k
      break;
618
0
    }
619
245k
    case Type::AFV3: {
620
245k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
245k
      break;
622
0
    }
623
34.9k
    case Type::DCT64X64: {
624
34.9k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
34.9k
                                 scratch_space);
626
34.9k
      break;
627
0
    }
628
6.05k
    case Type::DCT64X32: {
629
6.05k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
6.05k
                                 scratch_space);
631
6.05k
      break;
632
0
    }
633
10.9k
    case Type::DCT32X64: {
634
10.9k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
10.9k
                                 scratch_space);
636
10.9k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
7.50M
  }
669
7.50M
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
186M
                                          float* JXL_RESTRICT scratch_space) {
462
186M
  using Type = AcStrategyType;
463
186M
  switch (strategy) {
464
14.8M
    case Type::IDENTITY: {
465
44.4M
      for (size_t y = 0; y < 2; y++) {
466
88.9M
        for (size_t x = 0; x < 2; x++) {
467
59.3M
          float block_dc = 0;
468
296M
          for (size_t iy = 0; iy < 4; iy++) {
469
1.18G
            for (size_t ix = 0; ix < 4; ix++) {
470
949M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
949M
            }
472
237M
          }
473
59.3M
          block_dc *= 1.0f / 16;
474
296M
          for (size_t iy = 0; iy < 4; iy++) {
475
1.18G
            for (size_t ix = 0; ix < 4; ix++) {
476
949M
              if (ix == 1 && iy == 1) continue;
477
889M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
889M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
889M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
889M
            }
481
237M
          }
482
59.3M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
59.3M
          coefficients[y * 8 + x] = block_dc;
484
59.3M
        }
485
29.6M
      }
486
14.8M
      float block00 = coefficients[0];
487
14.8M
      float block01 = coefficients[1];
488
14.8M
      float block10 = coefficients[8];
489
14.8M
      float block11 = coefficients[9];
490
14.8M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
14.8M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
14.8M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
14.8M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
14.8M
      break;
495
0
    }
496
14.8M
    case Type::DCT8X4: {
497
44.4M
      for (size_t x = 0; x < 2; x++) {
498
29.6M
        HWY_ALIGN float block[4 * 8];
499
29.6M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
29.6M
                                 scratch_space);
501
148M
        for (size_t iy = 0; iy < 4; iy++) {
502
1.06G
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
949M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
949M
          }
506
118M
        }
507
29.6M
      }
508
14.8M
      float block0 = coefficients[0];
509
14.8M
      float block1 = coefficients[8];
510
14.8M
      coefficients[0] = (block0 + block1) * 0.5f;
511
14.8M
      coefficients[8] = (block0 - block1) * 0.5f;
512
14.8M
      break;
513
0
    }
514
14.8M
    case Type::DCT4X8: {
515
44.4M
      for (size_t y = 0; y < 2; y++) {
516
29.6M
        HWY_ALIGN float block[4 * 8];
517
29.6M
        ComputeScaledDCT<4, 8>()(
518
29.6M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
29.6M
            scratch_space);
520
148M
        for (size_t iy = 0; iy < 4; iy++) {
521
1.06G
          for (size_t ix = 0; ix < 8; ix++) {
522
949M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
949M
          }
524
118M
        }
525
29.6M
      }
526
14.8M
      float block0 = coefficients[0];
527
14.8M
      float block1 = coefficients[8];
528
14.8M
      coefficients[0] = (block0 + block1) * 0.5f;
529
14.8M
      coefficients[8] = (block0 - block1) * 0.5f;
530
14.8M
      break;
531
0
    }
532
14.8M
    case Type::DCT4X4: {
533
44.4M
      for (size_t y = 0; y < 2; y++) {
534
88.9M
        for (size_t x = 0; x < 2; x++) {
535
59.3M
          HWY_ALIGN float block[4 * 4];
536
59.3M
          ComputeScaledDCT<4, 4>()(
537
59.3M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
59.3M
              block, scratch_space);
539
296M
          for (size_t iy = 0; iy < 4; iy++) {
540
1.18G
            for (size_t ix = 0; ix < 4; ix++) {
541
949M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
949M
            }
543
237M
          }
544
59.3M
        }
545
29.6M
      }
546
14.8M
      float block00 = coefficients[0];
547
14.8M
      float block01 = coefficients[1];
548
14.8M
      float block10 = coefficients[8];
549
14.8M
      float block11 = coefficients[9];
550
14.8M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
14.8M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
14.8M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
14.8M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
14.8M
      break;
555
0
    }
556
14.8M
    case Type::DCT2X2: {
557
14.8M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
14.8M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
14.8M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
14.8M
      break;
561
0
    }
562
6.15M
    case Type::DCT16X16: {
563
6.15M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
6.15M
                                 scratch_space);
565
6.15M
      break;
566
0
    }
567
12.0M
    case Type::DCT16X8: {
568
12.0M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
12.0M
                                scratch_space);
570
12.0M
      break;
571
0
    }
572
12.0M
    case Type::DCT8X16: {
573
12.0M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
12.0M
                                scratch_space);
575
12.0M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
2.40M
    case Type::DCT32X16: {
588
2.40M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
2.40M
                                 scratch_space);
590
2.40M
      break;
591
0
    }
592
2.42M
    case Type::DCT16X32: {
593
2.42M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
2.42M
                                 scratch_space);
595
2.42M
      break;
596
0
    }
597
1.23M
    case Type::DCT32X32: {
598
1.23M
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
1.23M
                                 scratch_space);
600
1.23M
      break;
601
0
    }
602
14.8M
    case Type::DCT: {
603
14.8M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
14.8M
                               scratch_space);
605
14.8M
      break;
606
0
    }
607
14.8M
    case Type::AFV0: {
608
14.8M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
14.8M
      break;
610
0
    }
611
14.8M
    case Type::AFV1: {
612
14.8M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
14.8M
      break;
614
0
    }
615
14.8M
    case Type::AFV2: {
616
14.8M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
14.8M
      break;
618
0
    }
619
14.8M
    case Type::AFV3: {
620
14.8M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
14.8M
      break;
622
0
    }
623
203k
    case Type::DCT64X64: {
624
203k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
203k
                                 scratch_space);
626
203k
      break;
627
0
    }
628
755k
    case Type::DCT64X32: {
629
755k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
755k
                                 scratch_space);
631
755k
      break;
632
0
    }
633
513k
    case Type::DCT32X64: {
634
513k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
513k
                                 scratch_space);
636
513k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
186M
  }
669
186M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
22.3M
                                          float* JXL_RESTRICT scratch_space) {
462
22.3M
  using Type = AcStrategyType;
463
22.3M
  switch (strategy) {
464
1.05M
    case Type::IDENTITY: {
465
3.15M
      for (size_t y = 0; y < 2; y++) {
466
6.31M
        for (size_t x = 0; x < 2; x++) {
467
4.20M
          float block_dc = 0;
468
21.0M
          for (size_t iy = 0; iy < 4; iy++) {
469
84.1M
            for (size_t ix = 0; ix < 4; ix++) {
470
67.3M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
67.3M
            }
472
16.8M
          }
473
4.20M
          block_dc *= 1.0f / 16;
474
21.0M
          for (size_t iy = 0; iy < 4; iy++) {
475
84.1M
            for (size_t ix = 0; ix < 4; ix++) {
476
67.3M
              if (ix == 1 && iy == 1) continue;
477
63.1M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
63.1M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
63.1M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
63.1M
            }
481
16.8M
          }
482
4.20M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
4.20M
          coefficients[y * 8 + x] = block_dc;
484
4.20M
        }
485
2.10M
      }
486
1.05M
      float block00 = coefficients[0];
487
1.05M
      float block01 = coefficients[1];
488
1.05M
      float block10 = coefficients[8];
489
1.05M
      float block11 = coefficients[9];
490
1.05M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
1.05M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
1.05M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
1.05M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
1.05M
      break;
495
0
    }
496
447k
    case Type::DCT8X4: {
497
1.34M
      for (size_t x = 0; x < 2; x++) {
498
894k
        HWY_ALIGN float block[4 * 8];
499
894k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
894k
                                 scratch_space);
501
4.47M
        for (size_t iy = 0; iy < 4; iy++) {
502
32.1M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
28.6M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
28.6M
          }
506
3.57M
        }
507
894k
      }
508
447k
      float block0 = coefficients[0];
509
447k
      float block1 = coefficients[8];
510
447k
      coefficients[0] = (block0 + block1) * 0.5f;
511
447k
      coefficients[8] = (block0 - block1) * 0.5f;
512
447k
      break;
513
0
    }
514
281k
    case Type::DCT4X8: {
515
844k
      for (size_t y = 0; y < 2; y++) {
516
563k
        HWY_ALIGN float block[4 * 8];
517
563k
        ComputeScaledDCT<4, 8>()(
518
563k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
563k
            scratch_space);
520
2.81M
        for (size_t iy = 0; iy < 4; iy++) {
521
20.2M
          for (size_t ix = 0; ix < 8; ix++) {
522
18.0M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
18.0M
          }
524
2.25M
        }
525
563k
      }
526
281k
      float block0 = coefficients[0];
527
281k
      float block1 = coefficients[8];
528
281k
      coefficients[0] = (block0 + block1) * 0.5f;
529
281k
      coefficients[8] = (block0 - block1) * 0.5f;
530
281k
      break;
531
0
    }
532
2.32k
    case Type::DCT4X4: {
533
6.96k
      for (size_t y = 0; y < 2; y++) {
534
13.9k
        for (size_t x = 0; x < 2; x++) {
535
9.28k
          HWY_ALIGN float block[4 * 4];
536
9.28k
          ComputeScaledDCT<4, 4>()(
537
9.28k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
9.28k
              block, scratch_space);
539
46.4k
          for (size_t iy = 0; iy < 4; iy++) {
540
185k
            for (size_t ix = 0; ix < 4; ix++) {
541
148k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
148k
            }
543
37.1k
          }
544
9.28k
        }
545
4.64k
      }
546
2.32k
      float block00 = coefficients[0];
547
2.32k
      float block01 = coefficients[1];
548
2.32k
      float block10 = coefficients[8];
549
2.32k
      float block11 = coefficients[9];
550
2.32k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
2.32k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
2.32k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
2.32k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
2.32k
      break;
555
0
    }
556
1.00M
    case Type::DCT2X2: {
557
1.00M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
1.00M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
1.00M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
1.00M
      break;
561
0
    }
562
178k
    case Type::DCT16X16: {
563
178k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
178k
                                 scratch_space);
565
178k
      break;
566
0
    }
567
297k
    case Type::DCT16X8: {
568
297k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
297k
                                scratch_space);
570
297k
      break;
571
0
    }
572
378k
    case Type::DCT8X16: {
573
378k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
378k
                                scratch_space);
575
378k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
80.1k
    case Type::DCT32X16: {
588
80.1k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
80.1k
                                 scratch_space);
590
80.1k
      break;
591
0
    }
592
123k
    case Type::DCT16X32: {
593
123k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
123k
                                 scratch_space);
595
123k
      break;
596
0
    }
597
130k
    case Type::DCT32X32: {
598
130k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
130k
                                 scratch_space);
600
130k
      break;
601
0
    }
602
17.3M
    case Type::DCT: {
603
17.3M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
17.3M
                               scratch_space);
605
17.3M
      break;
606
0
    }
607
232k
    case Type::AFV0: {
608
232k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
232k
      break;
610
0
    }
611
244k
    case Type::AFV1: {
612
244k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
244k
      break;
614
0
    }
615
220k
    case Type::AFV2: {
616
220k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
220k
      break;
618
0
    }
619
245k
    case Type::AFV3: {
620
245k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
245k
      break;
622
0
    }
623
34.9k
    case Type::DCT64X64: {
624
34.9k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
34.9k
                                 scratch_space);
626
34.9k
      break;
627
0
    }
628
6.05k
    case Type::DCT64X32: {
629
6.05k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
6.05k
                                 scratch_space);
631
6.05k
      break;
632
0
    }
633
10.9k
    case Type::DCT32X64: {
634
10.9k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
10.9k
                                 scratch_space);
636
10.9k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
22.3M
  }
669
22.3M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
670
671
// `scratch_space` should be at least 4 * kMaxBlocks * kMaxBlocks elements.
672
HWY_MAYBE_UNUSED void DCFromLowestFrequencies(const AcStrategyType strategy,
673
                                              const float* block, float* dc,
674
                                              size_t dc_stride,
675
29.8M
                                              float* scratch_space) {
676
29.8M
  using Type = AcStrategyType;
677
29.8M
  switch (strategy) {
678
595k
    case Type::DCT16X8: {
679
595k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
595k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
595k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
595k
      break;
683
0
    }
684
757k
    case Type::DCT8X16: {
685
757k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
757k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
757k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
757k
      break;
689
0
    }
690
357k
    case Type::DCT16X16: {
691
357k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
357k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
357k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
357k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
160k
    case Type::DCT32X16: {
709
160k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
160k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
160k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
160k
      break;
713
0
    }
714
246k
    case Type::DCT16X32: {
715
246k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
246k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
246k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
246k
      break;
719
0
    }
720
261k
    case Type::DCT32X32: {
721
261k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
261k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
261k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
261k
      break;
725
0
    }
726
12.1k
    case Type::DCT64X32: {
727
12.1k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
12.1k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
12.1k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
12.1k
      break;
731
0
    }
732
21.8k
    case Type::DCT32X64: {
733
21.8k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
21.8k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
21.8k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
21.8k
      break;
737
0
    }
738
69.9k
    case Type::DCT64X64: {
739
69.9k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
69.9k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
69.9k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
69.9k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
19.8M
    case Type::DCT:
787
21.8M
    case Type::DCT2X2:
788
21.9M
    case Type::DCT4X4:
789
22.4M
    case Type::DCT4X8:
790
23.3M
    case Type::DCT8X4:
791
23.8M
    case Type::AFV0:
792
24.3M
    case Type::AFV1:
793
24.7M
    case Type::AFV2:
794
25.2M
    case Type::AFV3:
795
27.3M
    case Type::IDENTITY:
796
27.3M
      dc[0] = block[0];
797
27.3M
      break;
798
29.8M
  }
799
29.8M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
7.50M
                                              float* scratch_space) {
676
7.50M
  using Type = AcStrategyType;
677
7.50M
  switch (strategy) {
678
297k
    case Type::DCT16X8: {
679
297k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
297k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
297k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
297k
      break;
683
0
    }
684
378k
    case Type::DCT8X16: {
685
378k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
378k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
378k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
378k
      break;
689
0
    }
690
178k
    case Type::DCT16X16: {
691
178k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
178k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
178k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
178k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
80.1k
    case Type::DCT32X16: {
709
80.1k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
80.1k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
80.1k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
80.1k
      break;
713
0
    }
714
123k
    case Type::DCT16X32: {
715
123k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
123k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
123k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
123k
      break;
719
0
    }
720
130k
    case Type::DCT32X32: {
721
130k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
130k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
130k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
130k
      break;
725
0
    }
726
6.05k
    case Type::DCT64X32: {
727
6.05k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
6.05k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
6.05k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
6.05k
      break;
731
0
    }
732
10.9k
    case Type::DCT32X64: {
733
10.9k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
10.9k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
10.9k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
10.9k
      break;
737
0
    }
738
34.9k
    case Type::DCT64X64: {
739
34.9k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
34.9k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
34.9k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
34.9k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
2.53M
    case Type::DCT:
787
3.53M
    case Type::DCT2X2:
788
3.53M
    case Type::DCT4X4:
789
3.81M
    case Type::DCT4X8:
790
4.26M
    case Type::DCT8X4:
791
4.49M
    case Type::AFV0:
792
4.74M
    case Type::AFV1:
793
4.96M
    case Type::AFV2:
794
5.20M
    case Type::AFV3:
795
6.25M
    case Type::IDENTITY:
796
6.25M
      dc[0] = block[0];
797
6.25M
      break;
798
7.50M
  }
799
7.50M
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
22.3M
                                              float* scratch_space) {
676
22.3M
  using Type = AcStrategyType;
677
22.3M
  switch (strategy) {
678
297k
    case Type::DCT16X8: {
679
297k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
297k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
297k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
297k
      break;
683
0
    }
684
378k
    case Type::DCT8X16: {
685
378k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
378k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
378k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
378k
      break;
689
0
    }
690
178k
    case Type::DCT16X16: {
691
178k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
178k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
178k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
178k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
80.1k
    case Type::DCT32X16: {
709
80.1k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
80.1k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
80.1k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
80.1k
      break;
713
0
    }
714
123k
    case Type::DCT16X32: {
715
123k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
123k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
123k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
123k
      break;
719
0
    }
720
130k
    case Type::DCT32X32: {
721
130k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
130k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
130k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
130k
      break;
725
0
    }
726
6.05k
    case Type::DCT64X32: {
727
6.05k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
6.05k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
6.05k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
6.05k
      break;
731
0
    }
732
10.9k
    case Type::DCT32X64: {
733
10.9k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
10.9k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
10.9k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
10.9k
      break;
737
0
    }
738
34.9k
    case Type::DCT64X64: {
739
34.9k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
34.9k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
34.9k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
34.9k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
17.3M
    case Type::DCT:
787
18.3M
    case Type::DCT2X2:
788
18.3M
    case Type::DCT4X4:
789
18.6M
    case Type::DCT4X8:
790
19.0M
    case Type::DCT8X4:
791
19.3M
    case Type::AFV0:
792
19.5M
    case Type::AFV1:
793
19.7M
    case Type::AFV2:
794
20.0M
    case Type::AFV3:
795
21.0M
    case Type::IDENTITY:
796
21.0M
      dc[0] = block[0];
797
21.0M
      break;
798
22.3M
  }
799
22.3M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
800
801
}  // namespace
802
// NOLINTNEXTLINE(google-readability-namespace-comments)
803
}  // namespace HWY_NAMESPACE
804
}  // namespace jxl
805
HWY_AFTER_NAMESPACE();
806
807
#endif  // LIB_JXL_ENC_TRANSFORMS_INL_H_