Coverage Report

Created: 2025-08-11 08:01

/src/libjxl/lib/jxl/enc_transforms-inl.h
Line
Count
Source (jump to first uncovered line)
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/base/compiler_specific.h"
7
#include "lib/jxl/frame_dimensions.h"
8
9
#if defined(LIB_JXL_ENC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
10
#ifdef LIB_JXL_ENC_TRANSFORMS_INL_H_
11
#undef LIB_JXL_ENC_TRANSFORMS_INL_H_
12
#else
13
#define LIB_JXL_ENC_TRANSFORMS_INL_H_
14
#endif
15
16
#include <cstddef>
17
#include <cstdint>
18
#include <hwy/highway.h>
19
20
#include "lib/jxl/ac_strategy.h"
21
#include "lib/jxl/dct-inl.h"
22
#include "lib/jxl/dct_scales.h"
23
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
27
enum class AcStrategyType : uint32_t;
28
29
namespace HWY_NAMESPACE {
30
namespace {
31
32
constexpr size_t kMaxBlocks = 32;
33
34
// Inverse of ReinterpretingDCT.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
HWY_INLINE void ReinterpretingIDCT(const float* input,
38
                                   const size_t input_stride, float* output,
39
1.84M
                                   const size_t output_stride, float* scratch) {
40
1.84M
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
1.84M
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
1.84M
  float* block = scratch;
43
1.84M
  if (ROWS < COLS) {
44
1.74M
    for (size_t y = 0; y < LF_ROWS; y++) {
45
3.89M
      for (size_t x = 0; x < LF_COLS; x++) {
46
2.91M
        block[y * COLS + x] = input[y * input_stride + x] *
47
2.91M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
2.91M
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
2.91M
      }
50
970k
    }
51
1.06M
  } else {
52
3.14M
    for (size_t y = 0; y < LF_COLS; y++) {
53
9.52M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
7.44M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
7.44M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
7.44M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
7.44M
      }
58
2.07M
    }
59
1.06M
  }
60
61
1.84M
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
1.84M
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
1.84M
                                  scratch_space);
64
1.84M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
258k
                                   const size_t output_stride, float* scratch) {
40
258k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
258k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
258k
  float* block = scratch;
43
258k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
258k
  } else {
52
516k
    for (size_t y = 0; y < LF_COLS; y++) {
53
774k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
516k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
516k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
516k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
516k
      }
58
258k
    }
59
258k
  }
60
61
258k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
258k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
258k
                                  scratch_space);
64
258k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
303k
                                   const size_t output_stride, float* scratch) {
40
303k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
303k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
303k
  float* block = scratch;
43
303k
  if (ROWS < COLS) {
44
607k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
911k
      for (size_t x = 0; x < LF_COLS; x++) {
46
607k
        block[y * COLS + x] = input[y * input_stride + x] *
47
607k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
607k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
607k
      }
50
303k
    }
51
303k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
303k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
303k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
303k
                                  scratch_space);
64
303k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
160k
                                   const size_t output_stride, float* scratch) {
40
160k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
160k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
160k
  float* block = scratch;
43
160k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
160k
  } else {
52
481k
    for (size_t y = 0; y < LF_COLS; y++) {
53
963k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
642k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
642k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
642k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
642k
      }
58
321k
    }
59
160k
  }
60
61
160k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
160k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
160k
                                  scratch_space);
64
160k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
44.9k
                                   const size_t output_stride, float* scratch) {
40
44.9k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
44.9k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
44.9k
  float* block = scratch;
43
44.9k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
44.9k
  } else {
52
134k
    for (size_t y = 0; y < LF_COLS; y++) {
53
449k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
359k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
359k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
359k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
359k
      }
58
89.8k
    }
59
44.9k
  }
60
61
44.9k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
44.9k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
44.9k
                                  scratch_space);
64
44.9k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
74.9k
                                   const size_t output_stride, float* scratch) {
40
74.9k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
74.9k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
74.9k
  float* block = scratch;
43
74.9k
  if (ROWS < COLS) {
44
224k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
749k
      for (size_t x = 0; x < LF_COLS; x++) {
46
599k
        block[y * COLS + x] = input[y * input_stride + x] *
47
599k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
599k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
599k
      }
50
149k
    }
51
74.9k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
74.9k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
74.9k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
74.9k
                                  scratch_space);
64
74.9k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
45.5k
                                   const size_t output_stride, float* scratch) {
40
45.5k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
45.5k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
45.5k
  float* block = scratch;
43
45.5k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
45.5k
  } else {
52
227k
    for (size_t y = 0; y < LF_COLS; y++) {
53
910k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
728k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
728k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
728k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
728k
      }
58
182k
    }
59
45.5k
  }
60
61
45.5k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
45.5k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
45.5k
                                  scratch_space);
64
45.5k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
4.96k
                                   const size_t output_stride, float* scratch) {
40
4.96k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
4.96k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
4.96k
  float* block = scratch;
43
4.96k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
4.96k
  } else {
52
24.8k
    for (size_t y = 0; y < LF_COLS; y++) {
53
178k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
158k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
158k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
158k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
158k
      }
58
19.8k
    }
59
4.96k
  }
60
61
4.96k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
4.96k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
4.96k
                                  scratch_space);
64
4.96k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
7.86k
                                   const size_t output_stride, float* scratch) {
40
7.86k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
7.86k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
7.86k
  float* block = scratch;
43
7.86k
  if (ROWS < COLS) {
44
39.3k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
283k
      for (size_t x = 0; x < LF_COLS; x++) {
46
251k
        block[y * COLS + x] = input[y * input_stride + x] *
47
251k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
251k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
251k
      }
50
31.4k
    }
51
7.86k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
7.86k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
7.86k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
7.86k
                                  scratch_space);
64
7.86k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
20.6k
                                   const size_t output_stride, float* scratch) {
40
20.6k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
20.6k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
20.6k
  float* block = scratch;
43
20.6k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
20.6k
  } else {
52
185k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.48M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
1.31M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
1.31M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
1.31M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
1.31M
      }
58
164k
    }
59
20.6k
  }
60
61
20.6k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
20.6k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
20.6k
                                  scratch_space);
64
20.6k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
258k
                                   const size_t output_stride, float* scratch) {
40
258k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
258k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
258k
  float* block = scratch;
43
258k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
258k
  } else {
52
516k
    for (size_t y = 0; y < LF_COLS; y++) {
53
774k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
516k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
516k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
516k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
516k
      }
58
258k
    }
59
258k
  }
60
61
258k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
258k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
258k
                                  scratch_space);
64
258k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
303k
                                   const size_t output_stride, float* scratch) {
40
303k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
303k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
303k
  float* block = scratch;
43
303k
  if (ROWS < COLS) {
44
607k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
911k
      for (size_t x = 0; x < LF_COLS; x++) {
46
607k
        block[y * COLS + x] = input[y * input_stride + x] *
47
607k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
607k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
607k
      }
50
303k
    }
51
303k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
303k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
303k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
303k
                                  scratch_space);
64
303k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
160k
                                   const size_t output_stride, float* scratch) {
40
160k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
160k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
160k
  float* block = scratch;
43
160k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
160k
  } else {
52
481k
    for (size_t y = 0; y < LF_COLS; y++) {
53
963k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
642k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
642k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
642k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
642k
      }
58
321k
    }
59
160k
  }
60
61
160k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
160k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
160k
                                  scratch_space);
64
160k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
44.9k
                                   const size_t output_stride, float* scratch) {
40
44.9k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
44.9k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
44.9k
  float* block = scratch;
43
44.9k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
44.9k
  } else {
52
134k
    for (size_t y = 0; y < LF_COLS; y++) {
53
449k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
359k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
359k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
359k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
359k
      }
58
89.8k
    }
59
44.9k
  }
60
61
44.9k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
44.9k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
44.9k
                                  scratch_space);
64
44.9k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
74.9k
                                   const size_t output_stride, float* scratch) {
40
74.9k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
74.9k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
74.9k
  float* block = scratch;
43
74.9k
  if (ROWS < COLS) {
44
224k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
749k
      for (size_t x = 0; x < LF_COLS; x++) {
46
599k
        block[y * COLS + x] = input[y * input_stride + x] *
47
599k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
599k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
599k
      }
50
149k
    }
51
74.9k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
74.9k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
74.9k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
74.9k
                                  scratch_space);
64
74.9k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
45.5k
                                   const size_t output_stride, float* scratch) {
40
45.5k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
45.5k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
45.5k
  float* block = scratch;
43
45.5k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
45.5k
  } else {
52
227k
    for (size_t y = 0; y < LF_COLS; y++) {
53
910k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
728k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
728k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
728k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
728k
      }
58
182k
    }
59
45.5k
  }
60
61
45.5k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
45.5k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
45.5k
                                  scratch_space);
64
45.5k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
4.96k
                                   const size_t output_stride, float* scratch) {
40
4.96k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
4.96k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
4.96k
  float* block = scratch;
43
4.96k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
4.96k
  } else {
52
24.8k
    for (size_t y = 0; y < LF_COLS; y++) {
53
178k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
158k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
158k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
158k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
158k
      }
58
19.8k
    }
59
4.96k
  }
60
61
4.96k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
4.96k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
4.96k
                                  scratch_space);
64
4.96k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
7.86k
                                   const size_t output_stride, float* scratch) {
40
7.86k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
7.86k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
7.86k
  float* block = scratch;
43
7.86k
  if (ROWS < COLS) {
44
39.3k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
283k
      for (size_t x = 0; x < LF_COLS; x++) {
46
251k
        block[y * COLS + x] = input[y * input_stride + x] *
47
251k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
251k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
251k
      }
50
31.4k
    }
51
7.86k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
7.86k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
7.86k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
7.86k
                                  scratch_space);
64
7.86k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
20.6k
                                   const size_t output_stride, float* scratch) {
40
20.6k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
20.6k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
20.6k
  float* block = scratch;
43
20.6k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
20.6k
  } else {
52
185k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.48M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
1.31M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
1.31M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
1.31M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
1.31M
      }
58
164k
    }
59
20.6k
  }
60
61
20.6k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
20.6k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
20.6k
                                  scratch_space);
64
20.6k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
65
66
template <size_t S>
67
34.2M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
34.2M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
34.2M
  static_assert(S % 2 == 0, "S should be even");
70
34.2M
  float temp[kDCTBlockSize];
71
34.2M
  constexpr size_t num_2x2 = S / 2;
72
114M
  for (size_t y = 0; y < num_2x2; y++) {
73
319M
    for (size_t x = 0; x < num_2x2; x++) {
74
239M
      float c00 = block[y * 2 * stride + x * 2];
75
239M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
239M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
239M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
239M
      float r00 = c00 + c01 + c10 + c11;
79
239M
      float r01 = c00 + c01 - c10 - c11;
80
239M
      float r10 = c00 - c01 + c10 - c11;
81
239M
      float r11 = c00 - c01 - c10 + c11;
82
239M
      r00 *= 0.25f;
83
239M
      r01 *= 0.25f;
84
239M
      r10 *= 0.25f;
85
239M
      r11 *= 0.25f;
86
239M
      temp[y * kBlockDim + x] = r00;
87
239M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
239M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
239M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
239M
    }
91
79.9M
  }
92
194M
  for (size_t y = 0; y < S; y++) {
93
1.11G
    for (size_t x = 0; x < S; x++) {
94
959M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
959M
    }
96
159M
  }
97
34.2M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
899k
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
899k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
899k
  static_assert(S % 2 == 0, "S should be even");
70
899k
  float temp[kDCTBlockSize];
71
899k
  constexpr size_t num_2x2 = S / 2;
72
4.49M
  for (size_t y = 0; y < num_2x2; y++) {
73
17.9M
    for (size_t x = 0; x < num_2x2; x++) {
74
14.3M
      float c00 = block[y * 2 * stride + x * 2];
75
14.3M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
14.3M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
14.3M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
14.3M
      float r00 = c00 + c01 + c10 + c11;
79
14.3M
      float r01 = c00 + c01 - c10 - c11;
80
14.3M
      float r10 = c00 - c01 + c10 - c11;
81
14.3M
      float r11 = c00 - c01 - c10 + c11;
82
14.3M
      r00 *= 0.25f;
83
14.3M
      r01 *= 0.25f;
84
14.3M
      r10 *= 0.25f;
85
14.3M
      r11 *= 0.25f;
86
14.3M
      temp[y * kBlockDim + x] = r00;
87
14.3M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
14.3M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
14.3M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
14.3M
    }
91
3.59M
  }
92
8.09M
  for (size_t y = 0; y < S; y++) {
93
64.7M
    for (size_t x = 0; x < S; x++) {
94
57.5M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
57.5M
    }
96
7.19M
  }
97
899k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
899k
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
899k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
899k
  static_assert(S % 2 == 0, "S should be even");
70
899k
  float temp[kDCTBlockSize];
71
899k
  constexpr size_t num_2x2 = S / 2;
72
2.69M
  for (size_t y = 0; y < num_2x2; y++) {
73
5.39M
    for (size_t x = 0; x < num_2x2; x++) {
74
3.59M
      float c00 = block[y * 2 * stride + x * 2];
75
3.59M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
3.59M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
3.59M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
3.59M
      float r00 = c00 + c01 + c10 + c11;
79
3.59M
      float r01 = c00 + c01 - c10 - c11;
80
3.59M
      float r10 = c00 - c01 + c10 - c11;
81
3.59M
      float r11 = c00 - c01 - c10 + c11;
82
3.59M
      r00 *= 0.25f;
83
3.59M
      r01 *= 0.25f;
84
3.59M
      r10 *= 0.25f;
85
3.59M
      r11 *= 0.25f;
86
3.59M
      temp[y * kBlockDim + x] = r00;
87
3.59M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
3.59M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
3.59M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
3.59M
    }
91
1.79M
  }
92
4.49M
  for (size_t y = 0; y < S; y++) {
93
17.9M
    for (size_t x = 0; x < S; x++) {
94
14.3M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
14.3M
    }
96
3.59M
  }
97
899k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
899k
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
899k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
899k
  static_assert(S % 2 == 0, "S should be even");
70
899k
  float temp[kDCTBlockSize];
71
899k
  constexpr size_t num_2x2 = S / 2;
72
1.79M
  for (size_t y = 0; y < num_2x2; y++) {
73
1.79M
    for (size_t x = 0; x < num_2x2; x++) {
74
899k
      float c00 = block[y * 2 * stride + x * 2];
75
899k
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
899k
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
899k
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
899k
      float r00 = c00 + c01 + c10 + c11;
79
899k
      float r01 = c00 + c01 - c10 - c11;
80
899k
      float r10 = c00 - c01 + c10 - c11;
81
899k
      float r11 = c00 - c01 - c10 + c11;
82
899k
      r00 *= 0.25f;
83
899k
      r01 *= 0.25f;
84
899k
      r10 *= 0.25f;
85
899k
      r11 *= 0.25f;
86
899k
      temp[y * kBlockDim + x] = r00;
87
899k
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
899k
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
899k
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
899k
    }
91
899k
  }
92
2.69M
  for (size_t y = 0; y < S; y++) {
93
5.39M
    for (size_t x = 0; x < S; x++) {
94
3.59M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
3.59M
    }
96
1.79M
  }
97
899k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
9.62M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
9.62M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
9.62M
  static_assert(S % 2 == 0, "S should be even");
70
9.62M
  float temp[kDCTBlockSize];
71
9.62M
  constexpr size_t num_2x2 = S / 2;
72
48.1M
  for (size_t y = 0; y < num_2x2; y++) {
73
192M
    for (size_t x = 0; x < num_2x2; x++) {
74
153M
      float c00 = block[y * 2 * stride + x * 2];
75
153M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
153M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
153M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
153M
      float r00 = c00 + c01 + c10 + c11;
79
153M
      float r01 = c00 + c01 - c10 - c11;
80
153M
      float r10 = c00 - c01 + c10 - c11;
81
153M
      float r11 = c00 - c01 - c10 + c11;
82
153M
      r00 *= 0.25f;
83
153M
      r01 *= 0.25f;
84
153M
      r10 *= 0.25f;
85
153M
      r11 *= 0.25f;
86
153M
      temp[y * kBlockDim + x] = r00;
87
153M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
153M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
153M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
153M
    }
91
38.4M
  }
92
86.5M
  for (size_t y = 0; y < S; y++) {
93
692M
    for (size_t x = 0; x < S; x++) {
94
615M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
615M
    }
96
76.9M
  }
97
9.62M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
9.62M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
9.62M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
9.62M
  static_assert(S % 2 == 0, "S should be even");
70
9.62M
  float temp[kDCTBlockSize];
71
9.62M
  constexpr size_t num_2x2 = S / 2;
72
28.8M
  for (size_t y = 0; y < num_2x2; y++) {
73
57.7M
    for (size_t x = 0; x < num_2x2; x++) {
74
38.4M
      float c00 = block[y * 2 * stride + x * 2];
75
38.4M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
38.4M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
38.4M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
38.4M
      float r00 = c00 + c01 + c10 + c11;
79
38.4M
      float r01 = c00 + c01 - c10 - c11;
80
38.4M
      float r10 = c00 - c01 + c10 - c11;
81
38.4M
      float r11 = c00 - c01 - c10 + c11;
82
38.4M
      r00 *= 0.25f;
83
38.4M
      r01 *= 0.25f;
84
38.4M
      r10 *= 0.25f;
85
38.4M
      r11 *= 0.25f;
86
38.4M
      temp[y * kBlockDim + x] = r00;
87
38.4M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
38.4M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
38.4M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
38.4M
    }
91
19.2M
  }
92
48.1M
  for (size_t y = 0; y < S; y++) {
93
192M
    for (size_t x = 0; x < S; x++) {
94
153M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
153M
    }
96
38.4M
  }
97
9.62M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
9.62M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
9.62M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
9.62M
  static_assert(S % 2 == 0, "S should be even");
70
9.62M
  float temp[kDCTBlockSize];
71
9.62M
  constexpr size_t num_2x2 = S / 2;
72
19.2M
  for (size_t y = 0; y < num_2x2; y++) {
73
19.2M
    for (size_t x = 0; x < num_2x2; x++) {
74
9.62M
      float c00 = block[y * 2 * stride + x * 2];
75
9.62M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
9.62M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
9.62M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
9.62M
      float r00 = c00 + c01 + c10 + c11;
79
9.62M
      float r01 = c00 + c01 - c10 - c11;
80
9.62M
      float r10 = c00 - c01 + c10 - c11;
81
9.62M
      float r11 = c00 - c01 - c10 + c11;
82
9.62M
      r00 *= 0.25f;
83
9.62M
      r01 *= 0.25f;
84
9.62M
      r10 *= 0.25f;
85
9.62M
      r11 *= 0.25f;
86
9.62M
      temp[y * kBlockDim + x] = r00;
87
9.62M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
9.62M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
9.62M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
9.62M
    }
91
9.62M
  }
92
28.8M
  for (size_t y = 0; y < S; y++) {
93
57.7M
    for (size_t x = 0; x < S; x++) {
94
38.4M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
38.4M
    }
96
19.2M
  }
97
9.62M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
899k
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
899k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
899k
  static_assert(S % 2 == 0, "S should be even");
70
899k
  float temp[kDCTBlockSize];
71
899k
  constexpr size_t num_2x2 = S / 2;
72
4.49M
  for (size_t y = 0; y < num_2x2; y++) {
73
17.9M
    for (size_t x = 0; x < num_2x2; x++) {
74
14.3M
      float c00 = block[y * 2 * stride + x * 2];
75
14.3M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
14.3M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
14.3M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
14.3M
      float r00 = c00 + c01 + c10 + c11;
79
14.3M
      float r01 = c00 + c01 - c10 - c11;
80
14.3M
      float r10 = c00 - c01 + c10 - c11;
81
14.3M
      float r11 = c00 - c01 - c10 + c11;
82
14.3M
      r00 *= 0.25f;
83
14.3M
      r01 *= 0.25f;
84
14.3M
      r10 *= 0.25f;
85
14.3M
      r11 *= 0.25f;
86
14.3M
      temp[y * kBlockDim + x] = r00;
87
14.3M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
14.3M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
14.3M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
14.3M
    }
91
3.59M
  }
92
8.09M
  for (size_t y = 0; y < S; y++) {
93
64.7M
    for (size_t x = 0; x < S; x++) {
94
57.5M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
57.5M
    }
96
7.19M
  }
97
899k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
899k
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
899k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
899k
  static_assert(S % 2 == 0, "S should be even");
70
899k
  float temp[kDCTBlockSize];
71
899k
  constexpr size_t num_2x2 = S / 2;
72
2.69M
  for (size_t y = 0; y < num_2x2; y++) {
73
5.39M
    for (size_t x = 0; x < num_2x2; x++) {
74
3.59M
      float c00 = block[y * 2 * stride + x * 2];
75
3.59M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
3.59M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
3.59M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
3.59M
      float r00 = c00 + c01 + c10 + c11;
79
3.59M
      float r01 = c00 + c01 - c10 - c11;
80
3.59M
      float r10 = c00 - c01 + c10 - c11;
81
3.59M
      float r11 = c00 - c01 - c10 + c11;
82
3.59M
      r00 *= 0.25f;
83
3.59M
      r01 *= 0.25f;
84
3.59M
      r10 *= 0.25f;
85
3.59M
      r11 *= 0.25f;
86
3.59M
      temp[y * kBlockDim + x] = r00;
87
3.59M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
3.59M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
3.59M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
3.59M
    }
91
1.79M
  }
92
4.49M
  for (size_t y = 0; y < S; y++) {
93
17.9M
    for (size_t x = 0; x < S; x++) {
94
14.3M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
14.3M
    }
96
3.59M
  }
97
899k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
899k
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
899k
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
899k
  static_assert(S % 2 == 0, "S should be even");
70
899k
  float temp[kDCTBlockSize];
71
899k
  constexpr size_t num_2x2 = S / 2;
72
1.79M
  for (size_t y = 0; y < num_2x2; y++) {
73
1.79M
    for (size_t x = 0; x < num_2x2; x++) {
74
899k
      float c00 = block[y * 2 * stride + x * 2];
75
899k
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
899k
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
899k
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
899k
      float r00 = c00 + c01 + c10 + c11;
79
899k
      float r01 = c00 + c01 - c10 - c11;
80
899k
      float r10 = c00 - c01 + c10 - c11;
81
899k
      float r11 = c00 - c01 - c10 + c11;
82
899k
      r00 *= 0.25f;
83
899k
      r01 *= 0.25f;
84
899k
      r10 *= 0.25f;
85
899k
      r11 *= 0.25f;
86
899k
      temp[y * kBlockDim + x] = r00;
87
899k
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
899k
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
899k
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
899k
    }
91
899k
  }
92
2.69M
  for (size_t y = 0; y < S; y++) {
93
5.39M
    for (size_t x = 0; x < S; x++) {
94
3.59M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
3.59M
    }
96
1.79M
  }
97
899k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
98
99
40.0M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
40.0M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
40.0M
      {
102
40.0M
          0.2500000000000000,
103
40.0M
          0.8769029297991420f,
104
40.0M
          0.0000000000000000,
105
40.0M
          0.0000000000000000,
106
40.0M
          0.0000000000000000,
107
40.0M
          -0.4105377591765233f,
108
40.0M
          0.0000000000000000,
109
40.0M
          0.0000000000000000,
110
40.0M
          0.0000000000000000,
111
40.0M
          0.0000000000000000,
112
40.0M
          0.0000000000000000,
113
40.0M
          0.0000000000000000,
114
40.0M
          0.0000000000000000,
115
40.0M
          0.0000000000000000,
116
40.0M
          0.0000000000000000,
117
40.0M
          0.0000000000000000,
118
40.0M
      },
119
40.0M
      {
120
40.0M
          0.2500000000000000,
121
40.0M
          0.2206518106944235f,
122
40.0M
          0.0000000000000000,
123
40.0M
          0.0000000000000000,
124
40.0M
          -0.7071067811865474f,
125
40.0M
          0.6235485373547691f,
126
40.0M
          0.0000000000000000,
127
40.0M
          0.0000000000000000,
128
40.0M
          0.0000000000000000,
129
40.0M
          0.0000000000000000,
130
40.0M
          0.0000000000000000,
131
40.0M
          0.0000000000000000,
132
40.0M
          0.0000000000000000,
133
40.0M
          0.0000000000000000,
134
40.0M
          0.0000000000000000,
135
40.0M
          0.0000000000000000,
136
40.0M
      },
137
40.0M
      {
138
40.0M
          0.2500000000000000,
139
40.0M
          -0.1014005039375376f,
140
40.0M
          0.4067007583026075f,
141
40.0M
          -0.2125574805828875f,
142
40.0M
          0.0000000000000000,
143
40.0M
          -0.0643507165794627f,
144
40.0M
          -0.4517556589999482f,
145
40.0M
          -0.3046847507248690f,
146
40.0M
          0.3017929516615495f,
147
40.0M
          0.4082482904638627f,
148
40.0M
          0.1747866975480809f,
149
40.0M
          -0.2110560104933578f,
150
40.0M
          -0.1426608480880726f,
151
40.0M
          -0.1381354035075859f,
152
40.0M
          -0.1743760259965107f,
153
40.0M
          0.1135498731499434f,
154
40.0M
      },
155
40.0M
      {
156
40.0M
          0.2500000000000000,
157
40.0M
          -0.1014005039375375f,
158
40.0M
          0.4444481661973445f,
159
40.0M
          0.3085497062849767f,
160
40.0M
          0.0000000000000000f,
161
40.0M
          -0.0643507165794627f,
162
40.0M
          0.1585450355184006f,
163
40.0M
          0.5112616136591823f,
164
40.0M
          0.2579236279634118f,
165
40.0M
          0.0000000000000000,
166
40.0M
          0.0812611176717539f,
167
40.0M
          0.1856718091610980f,
168
40.0M
          -0.3416446842253372f,
169
40.0M
          0.3302282550303788f,
170
40.0M
          0.0702790691196284f,
171
40.0M
          -0.0741750459581035f,
172
40.0M
      },
173
40.0M
      {
174
40.0M
          0.2500000000000000,
175
40.0M
          0.2206518106944236f,
176
40.0M
          0.0000000000000000,
177
40.0M
          0.0000000000000000,
178
40.0M
          0.7071067811865476f,
179
40.0M
          0.6235485373547694f,
180
40.0M
          0.0000000000000000,
181
40.0M
          0.0000000000000000,
182
40.0M
          0.0000000000000000,
183
40.0M
          0.0000000000000000,
184
40.0M
          0.0000000000000000,
185
40.0M
          0.0000000000000000,
186
40.0M
          0.0000000000000000,
187
40.0M
          0.0000000000000000,
188
40.0M
          0.0000000000000000,
189
40.0M
          0.0000000000000000,
190
40.0M
      },
191
40.0M
      {
192
40.0M
          0.2500000000000000,
193
40.0M
          -0.1014005039375378f,
194
40.0M
          0.0000000000000000,
195
40.0M
          0.4706702258572536f,
196
40.0M
          0.0000000000000000,
197
40.0M
          -0.0643507165794628f,
198
40.0M
          -0.0403851516082220f,
199
40.0M
          0.0000000000000000,
200
40.0M
          0.1627234014286620f,
201
40.0M
          0.0000000000000000,
202
40.0M
          0.0000000000000000,
203
40.0M
          0.0000000000000000,
204
40.0M
          0.7367497537172237f,
205
40.0M
          0.0875511500058708f,
206
40.0M
          -0.2921026642334881f,
207
40.0M
          0.1940289303259434f,
208
40.0M
      },
209
40.0M
      {
210
40.0M
          0.2500000000000000,
211
40.0M
          -0.1014005039375377f,
212
40.0M
          0.1957439937204294f,
213
40.0M
          -0.1621205195722993f,
214
40.0M
          0.0000000000000000,
215
40.0M
          -0.0643507165794628f,
216
40.0M
          0.0074182263792424f,
217
40.0M
          -0.2904801297289980f,
218
40.0M
          0.0952002265347504f,
219
40.0M
          0.0000000000000000,
220
40.0M
          -0.3675398009862027f,
221
40.0M
          0.4921585901373873f,
222
40.0M
          0.2462710772207515f,
223
40.0M
          -0.0794670660590957f,
224
40.0M
          0.3623817333531167f,
225
40.0M
          -0.4351904965232280f,
226
40.0M
      },
227
40.0M
      {
228
40.0M
          0.2500000000000000,
229
40.0M
          -0.1014005039375376f,
230
40.0M
          0.2929100136981264f,
231
40.0M
          0.0000000000000000,
232
40.0M
          0.0000000000000000,
233
40.0M
          -0.0643507165794627f,
234
40.0M
          0.3935103426921017f,
235
40.0M
          -0.0657870154914280f,
236
40.0M
          0.0000000000000000,
237
40.0M
          -0.4082482904638628f,
238
40.0M
          -0.3078822139579090f,
239
40.0M
          -0.3852501370925192f,
240
40.0M
          -0.0857401903551931f,
241
40.0M
          -0.4613374887461511f,
242
40.0M
          0.0000000000000000,
243
40.0M
          0.2191868483885747f,
244
40.0M
      },
245
40.0M
      {
246
40.0M
          0.2500000000000000,
247
40.0M
          -0.1014005039375376f,
248
40.0M
          -0.4067007583026072f,
249
40.0M
          -0.2125574805828705f,
250
40.0M
          0.0000000000000000,
251
40.0M
          -0.0643507165794627f,
252
40.0M
          -0.4517556589999464f,
253
40.0M
          0.3046847507248840f,
254
40.0M
          0.3017929516615503f,
255
40.0M
          -0.4082482904638635f,
256
40.0M
          -0.1747866975480813f,
257
40.0M
          0.2110560104933581f,
258
40.0M
          -0.1426608480880734f,
259
40.0M
          -0.1381354035075829f,
260
40.0M
          -0.1743760259965108f,
261
40.0M
          0.1135498731499426f,
262
40.0M
      },
263
40.0M
      {
264
40.0M
          0.2500000000000000,
265
40.0M
          -0.1014005039375377f,
266
40.0M
          -0.1957439937204287f,
267
40.0M
          -0.1621205195722833f,
268
40.0M
          0.0000000000000000,
269
40.0M
          -0.0643507165794628f,
270
40.0M
          0.0074182263792444f,
271
40.0M
          0.2904801297290076f,
272
40.0M
          0.0952002265347505f,
273
40.0M
          0.0000000000000000,
274
40.0M
          0.3675398009862011f,
275
40.0M
          -0.4921585901373891f,
276
40.0M
          0.2462710772207514f,
277
40.0M
          -0.0794670660591026f,
278
40.0M
          0.3623817333531165f,
279
40.0M
          -0.4351904965232251f,
280
40.0M
      },
281
40.0M
      {
282
40.0M
          0.2500000000000000,
283
40.0M
          -0.1014005039375375f,
284
40.0M
          0.0000000000000000,
285
40.0M
          -0.4706702258572528f,
286
40.0M
          0.0000000000000000,
287
40.0M
          -0.0643507165794627f,
288
40.0M
          0.1107416575309343f,
289
40.0M
          0.0000000000000000,
290
40.0M
          -0.1627234014286617f,
291
40.0M
          0.0000000000000000,
292
40.0M
          0.0000000000000000,
293
40.0M
          0.0000000000000000,
294
40.0M
          0.1488339922711357f,
295
40.0M
          0.4972464710953509f,
296
40.0M
          0.2921026642334879f,
297
40.0M
          0.5550443808910661f,
298
40.0M
      },
299
40.0M
      {
300
40.0M
          0.2500000000000000,
301
40.0M
          -0.1014005039375377f,
302
40.0M
          0.1137907446044809f,
303
40.0M
          -0.1464291867126764f,
304
40.0M
          0.0000000000000000,
305
40.0M
          -0.0643507165794628f,
306
40.0M
          0.0829816309488205f,
307
40.0M
          -0.2388977352334460f,
308
40.0M
          -0.3531238544981630f,
309
40.0M
          -0.4082482904638630f,
310
40.0M
          0.4826689115059883f,
311
40.0M
          0.1741941265991622f,
312
40.0M
          -0.0476868035022925f,
313
40.0M
          0.1253805944856366f,
314
40.0M
          -0.4326608024727445f,
315
40.0M
          -0.2546827712406646f,
316
40.0M
      },
317
40.0M
      {
318
40.0M
          0.2500000000000000,
319
40.0M
          -0.1014005039375377f,
320
40.0M
          -0.4444481661973438f,
321
40.0M
          0.3085497062849487f,
322
40.0M
          0.0000000000000000,
323
40.0M
          -0.0643507165794628f,
324
40.0M
          0.1585450355183970f,
325
40.0M
          -0.5112616136592012f,
326
40.0M
          0.2579236279634129f,
327
40.0M
          0.0000000000000000,
328
40.0M
          -0.0812611176717504f,
329
40.0M
          -0.1856718091610990f,
330
40.0M
          -0.3416446842253373f,
331
40.0M
          0.3302282550303805f,
332
40.0M
          0.0702790691196282f,
333
40.0M
          -0.0741750459581023f,
334
40.0M
      },
335
40.0M
      {
336
40.0M
          0.2500000000000000,
337
40.0M
          -0.1014005039375376f,
338
40.0M
          -0.2929100136981264f,
339
40.0M
          0.0000000000000000,
340
40.0M
          0.0000000000000000,
341
40.0M
          -0.0643507165794627f,
342
40.0M
          0.3935103426921022f,
343
40.0M
          0.0657870154914254f,
344
40.0M
          0.0000000000000000,
345
40.0M
          0.4082482904638634f,
346
40.0M
          0.3078822139579031f,
347
40.0M
          0.3852501370925211f,
348
40.0M
          -0.0857401903551927f,
349
40.0M
          -0.4613374887461554f,
350
40.0M
          0.0000000000000000,
351
40.0M
          0.2191868483885728f,
352
40.0M
      },
353
40.0M
      {
354
40.0M
          0.2500000000000000,
355
40.0M
          -0.1014005039375376f,
356
40.0M
          -0.1137907446044814f,
357
40.0M
          -0.1464291867126654f,
358
40.0M
          0.0000000000000000,
359
40.0M
          -0.0643507165794627f,
360
40.0M
          0.0829816309488214f,
361
40.0M
          0.2388977352334547f,
362
40.0M
          -0.3531238544981624f,
363
40.0M
          0.4082482904638630f,
364
40.0M
          -0.4826689115059858f,
365
40.0M
          -0.1741941265991621f,
366
40.0M
          -0.0476868035022928f,
367
40.0M
          0.1253805944856431f,
368
40.0M
          -0.4326608024727457f,
369
40.0M
          -0.2546827712406641f,
370
40.0M
      },
371
40.0M
      {
372
40.0M
          0.2500000000000000,
373
40.0M
          -0.1014005039375374f,
374
40.0M
          0.0000000000000000,
375
40.0M
          0.4251149611657548f,
376
40.0M
          0.0000000000000000,
377
40.0M
          -0.0643507165794626f,
378
40.0M
          -0.4517556589999480f,
379
40.0M
          0.0000000000000000,
380
40.0M
          -0.6035859033230976f,
381
40.0M
          0.0000000000000000,
382
40.0M
          0.0000000000000000,
383
40.0M
          0.0000000000000000,
384
40.0M
          -0.1426608480880724f,
385
40.0M
          -0.1381354035075845f,
386
40.0M
          0.3487520519930227f,
387
40.0M
          0.1135498731499429f,
388
40.0M
      },
389
40.0M
  };
390
391
40.0M
  const HWY_CAPPED(float, 16) d;
392
120M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
80.0M
    auto scalar = Zero(d);
394
1.36G
    for (size_t j = 0; j < 16; j++) {
395
1.28G
      auto px = Set(d, pixels[j]);
396
1.28G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
1.28G
      scalar = MulAdd(px, basis, scalar);
398
1.28G
    }
399
80.0M
    Store(scalar, d, coeffs + i);
400
80.0M
  }
401
40.0M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
763k
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
763k
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
763k
      {
102
763k
          0.2500000000000000,
103
763k
          0.8769029297991420f,
104
763k
          0.0000000000000000,
105
763k
          0.0000000000000000,
106
763k
          0.0000000000000000,
107
763k
          -0.4105377591765233f,
108
763k
          0.0000000000000000,
109
763k
          0.0000000000000000,
110
763k
          0.0000000000000000,
111
763k
          0.0000000000000000,
112
763k
          0.0000000000000000,
113
763k
          0.0000000000000000,
114
763k
          0.0000000000000000,
115
763k
          0.0000000000000000,
116
763k
          0.0000000000000000,
117
763k
          0.0000000000000000,
118
763k
      },
119
763k
      {
120
763k
          0.2500000000000000,
121
763k
          0.2206518106944235f,
122
763k
          0.0000000000000000,
123
763k
          0.0000000000000000,
124
763k
          -0.7071067811865474f,
125
763k
          0.6235485373547691f,
126
763k
          0.0000000000000000,
127
763k
          0.0000000000000000,
128
763k
          0.0000000000000000,
129
763k
          0.0000000000000000,
130
763k
          0.0000000000000000,
131
763k
          0.0000000000000000,
132
763k
          0.0000000000000000,
133
763k
          0.0000000000000000,
134
763k
          0.0000000000000000,
135
763k
          0.0000000000000000,
136
763k
      },
137
763k
      {
138
763k
          0.2500000000000000,
139
763k
          -0.1014005039375376f,
140
763k
          0.4067007583026075f,
141
763k
          -0.2125574805828875f,
142
763k
          0.0000000000000000,
143
763k
          -0.0643507165794627f,
144
763k
          -0.4517556589999482f,
145
763k
          -0.3046847507248690f,
146
763k
          0.3017929516615495f,
147
763k
          0.4082482904638627f,
148
763k
          0.1747866975480809f,
149
763k
          -0.2110560104933578f,
150
763k
          -0.1426608480880726f,
151
763k
          -0.1381354035075859f,
152
763k
          -0.1743760259965107f,
153
763k
          0.1135498731499434f,
154
763k
      },
155
763k
      {
156
763k
          0.2500000000000000,
157
763k
          -0.1014005039375375f,
158
763k
          0.4444481661973445f,
159
763k
          0.3085497062849767f,
160
763k
          0.0000000000000000f,
161
763k
          -0.0643507165794627f,
162
763k
          0.1585450355184006f,
163
763k
          0.5112616136591823f,
164
763k
          0.2579236279634118f,
165
763k
          0.0000000000000000,
166
763k
          0.0812611176717539f,
167
763k
          0.1856718091610980f,
168
763k
          -0.3416446842253372f,
169
763k
          0.3302282550303788f,
170
763k
          0.0702790691196284f,
171
763k
          -0.0741750459581035f,
172
763k
      },
173
763k
      {
174
763k
          0.2500000000000000,
175
763k
          0.2206518106944236f,
176
763k
          0.0000000000000000,
177
763k
          0.0000000000000000,
178
763k
          0.7071067811865476f,
179
763k
          0.6235485373547694f,
180
763k
          0.0000000000000000,
181
763k
          0.0000000000000000,
182
763k
          0.0000000000000000,
183
763k
          0.0000000000000000,
184
763k
          0.0000000000000000,
185
763k
          0.0000000000000000,
186
763k
          0.0000000000000000,
187
763k
          0.0000000000000000,
188
763k
          0.0000000000000000,
189
763k
          0.0000000000000000,
190
763k
      },
191
763k
      {
192
763k
          0.2500000000000000,
193
763k
          -0.1014005039375378f,
194
763k
          0.0000000000000000,
195
763k
          0.4706702258572536f,
196
763k
          0.0000000000000000,
197
763k
          -0.0643507165794628f,
198
763k
          -0.0403851516082220f,
199
763k
          0.0000000000000000,
200
763k
          0.1627234014286620f,
201
763k
          0.0000000000000000,
202
763k
          0.0000000000000000,
203
763k
          0.0000000000000000,
204
763k
          0.7367497537172237f,
205
763k
          0.0875511500058708f,
206
763k
          -0.2921026642334881f,
207
763k
          0.1940289303259434f,
208
763k
      },
209
763k
      {
210
763k
          0.2500000000000000,
211
763k
          -0.1014005039375377f,
212
763k
          0.1957439937204294f,
213
763k
          -0.1621205195722993f,
214
763k
          0.0000000000000000,
215
763k
          -0.0643507165794628f,
216
763k
          0.0074182263792424f,
217
763k
          -0.2904801297289980f,
218
763k
          0.0952002265347504f,
219
763k
          0.0000000000000000,
220
763k
          -0.3675398009862027f,
221
763k
          0.4921585901373873f,
222
763k
          0.2462710772207515f,
223
763k
          -0.0794670660590957f,
224
763k
          0.3623817333531167f,
225
763k
          -0.4351904965232280f,
226
763k
      },
227
763k
      {
228
763k
          0.2500000000000000,
229
763k
          -0.1014005039375376f,
230
763k
          0.2929100136981264f,
231
763k
          0.0000000000000000,
232
763k
          0.0000000000000000,
233
763k
          -0.0643507165794627f,
234
763k
          0.3935103426921017f,
235
763k
          -0.0657870154914280f,
236
763k
          0.0000000000000000,
237
763k
          -0.4082482904638628f,
238
763k
          -0.3078822139579090f,
239
763k
          -0.3852501370925192f,
240
763k
          -0.0857401903551931f,
241
763k
          -0.4613374887461511f,
242
763k
          0.0000000000000000,
243
763k
          0.2191868483885747f,
244
763k
      },
245
763k
      {
246
763k
          0.2500000000000000,
247
763k
          -0.1014005039375376f,
248
763k
          -0.4067007583026072f,
249
763k
          -0.2125574805828705f,
250
763k
          0.0000000000000000,
251
763k
          -0.0643507165794627f,
252
763k
          -0.4517556589999464f,
253
763k
          0.3046847507248840f,
254
763k
          0.3017929516615503f,
255
763k
          -0.4082482904638635f,
256
763k
          -0.1747866975480813f,
257
763k
          0.2110560104933581f,
258
763k
          -0.1426608480880734f,
259
763k
          -0.1381354035075829f,
260
763k
          -0.1743760259965108f,
261
763k
          0.1135498731499426f,
262
763k
      },
263
763k
      {
264
763k
          0.2500000000000000,
265
763k
          -0.1014005039375377f,
266
763k
          -0.1957439937204287f,
267
763k
          -0.1621205195722833f,
268
763k
          0.0000000000000000,
269
763k
          -0.0643507165794628f,
270
763k
          0.0074182263792444f,
271
763k
          0.2904801297290076f,
272
763k
          0.0952002265347505f,
273
763k
          0.0000000000000000,
274
763k
          0.3675398009862011f,
275
763k
          -0.4921585901373891f,
276
763k
          0.2462710772207514f,
277
763k
          -0.0794670660591026f,
278
763k
          0.3623817333531165f,
279
763k
          -0.4351904965232251f,
280
763k
      },
281
763k
      {
282
763k
          0.2500000000000000,
283
763k
          -0.1014005039375375f,
284
763k
          0.0000000000000000,
285
763k
          -0.4706702258572528f,
286
763k
          0.0000000000000000,
287
763k
          -0.0643507165794627f,
288
763k
          0.1107416575309343f,
289
763k
          0.0000000000000000,
290
763k
          -0.1627234014286617f,
291
763k
          0.0000000000000000,
292
763k
          0.0000000000000000,
293
763k
          0.0000000000000000,
294
763k
          0.1488339922711357f,
295
763k
          0.4972464710953509f,
296
763k
          0.2921026642334879f,
297
763k
          0.5550443808910661f,
298
763k
      },
299
763k
      {
300
763k
          0.2500000000000000,
301
763k
          -0.1014005039375377f,
302
763k
          0.1137907446044809f,
303
763k
          -0.1464291867126764f,
304
763k
          0.0000000000000000,
305
763k
          -0.0643507165794628f,
306
763k
          0.0829816309488205f,
307
763k
          -0.2388977352334460f,
308
763k
          -0.3531238544981630f,
309
763k
          -0.4082482904638630f,
310
763k
          0.4826689115059883f,
311
763k
          0.1741941265991622f,
312
763k
          -0.0476868035022925f,
313
763k
          0.1253805944856366f,
314
763k
          -0.4326608024727445f,
315
763k
          -0.2546827712406646f,
316
763k
      },
317
763k
      {
318
763k
          0.2500000000000000,
319
763k
          -0.1014005039375377f,
320
763k
          -0.4444481661973438f,
321
763k
          0.3085497062849487f,
322
763k
          0.0000000000000000,
323
763k
          -0.0643507165794628f,
324
763k
          0.1585450355183970f,
325
763k
          -0.5112616136592012f,
326
763k
          0.2579236279634129f,
327
763k
          0.0000000000000000,
328
763k
          -0.0812611176717504f,
329
763k
          -0.1856718091610990f,
330
763k
          -0.3416446842253373f,
331
763k
          0.3302282550303805f,
332
763k
          0.0702790691196282f,
333
763k
          -0.0741750459581023f,
334
763k
      },
335
763k
      {
336
763k
          0.2500000000000000,
337
763k
          -0.1014005039375376f,
338
763k
          -0.2929100136981264f,
339
763k
          0.0000000000000000,
340
763k
          0.0000000000000000,
341
763k
          -0.0643507165794627f,
342
763k
          0.3935103426921022f,
343
763k
          0.0657870154914254f,
344
763k
          0.0000000000000000,
345
763k
          0.4082482904638634f,
346
763k
          0.3078822139579031f,
347
763k
          0.3852501370925211f,
348
763k
          -0.0857401903551927f,
349
763k
          -0.4613374887461554f,
350
763k
          0.0000000000000000,
351
763k
          0.2191868483885728f,
352
763k
      },
353
763k
      {
354
763k
          0.2500000000000000,
355
763k
          -0.1014005039375376f,
356
763k
          -0.1137907446044814f,
357
763k
          -0.1464291867126654f,
358
763k
          0.0000000000000000,
359
763k
          -0.0643507165794627f,
360
763k
          0.0829816309488214f,
361
763k
          0.2388977352334547f,
362
763k
          -0.3531238544981624f,
363
763k
          0.4082482904638630f,
364
763k
          -0.4826689115059858f,
365
763k
          -0.1741941265991621f,
366
763k
          -0.0476868035022928f,
367
763k
          0.1253805944856431f,
368
763k
          -0.4326608024727457f,
369
763k
          -0.2546827712406641f,
370
763k
      },
371
763k
      {
372
763k
          0.2500000000000000,
373
763k
          -0.1014005039375374f,
374
763k
          0.0000000000000000,
375
763k
          0.4251149611657548f,
376
763k
          0.0000000000000000,
377
763k
          -0.0643507165794626f,
378
763k
          -0.4517556589999480f,
379
763k
          0.0000000000000000,
380
763k
          -0.6035859033230976f,
381
763k
          0.0000000000000000,
382
763k
          0.0000000000000000,
383
763k
          0.0000000000000000,
384
763k
          -0.1426608480880724f,
385
763k
          -0.1381354035075845f,
386
763k
          0.3487520519930227f,
387
763k
          0.1135498731499429f,
388
763k
      },
389
763k
  };
390
391
763k
  const HWY_CAPPED(float, 16) d;
392
2.29M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
1.52M
    auto scalar = Zero(d);
394
25.9M
    for (size_t j = 0; j < 16; j++) {
395
24.4M
      auto px = Set(d, pixels[j]);
396
24.4M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
24.4M
      scalar = MulAdd(px, basis, scalar);
398
24.4M
    }
399
1.52M
    Store(scalar, d, coeffs + i);
400
1.52M
  }
401
763k
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
38.4M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
38.4M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
38.4M
      {
102
38.4M
          0.2500000000000000,
103
38.4M
          0.8769029297991420f,
104
38.4M
          0.0000000000000000,
105
38.4M
          0.0000000000000000,
106
38.4M
          0.0000000000000000,
107
38.4M
          -0.4105377591765233f,
108
38.4M
          0.0000000000000000,
109
38.4M
          0.0000000000000000,
110
38.4M
          0.0000000000000000,
111
38.4M
          0.0000000000000000,
112
38.4M
          0.0000000000000000,
113
38.4M
          0.0000000000000000,
114
38.4M
          0.0000000000000000,
115
38.4M
          0.0000000000000000,
116
38.4M
          0.0000000000000000,
117
38.4M
          0.0000000000000000,
118
38.4M
      },
119
38.4M
      {
120
38.4M
          0.2500000000000000,
121
38.4M
          0.2206518106944235f,
122
38.4M
          0.0000000000000000,
123
38.4M
          0.0000000000000000,
124
38.4M
          -0.7071067811865474f,
125
38.4M
          0.6235485373547691f,
126
38.4M
          0.0000000000000000,
127
38.4M
          0.0000000000000000,
128
38.4M
          0.0000000000000000,
129
38.4M
          0.0000000000000000,
130
38.4M
          0.0000000000000000,
131
38.4M
          0.0000000000000000,
132
38.4M
          0.0000000000000000,
133
38.4M
          0.0000000000000000,
134
38.4M
          0.0000000000000000,
135
38.4M
          0.0000000000000000,
136
38.4M
      },
137
38.4M
      {
138
38.4M
          0.2500000000000000,
139
38.4M
          -0.1014005039375376f,
140
38.4M
          0.4067007583026075f,
141
38.4M
          -0.2125574805828875f,
142
38.4M
          0.0000000000000000,
143
38.4M
          -0.0643507165794627f,
144
38.4M
          -0.4517556589999482f,
145
38.4M
          -0.3046847507248690f,
146
38.4M
          0.3017929516615495f,
147
38.4M
          0.4082482904638627f,
148
38.4M
          0.1747866975480809f,
149
38.4M
          -0.2110560104933578f,
150
38.4M
          -0.1426608480880726f,
151
38.4M
          -0.1381354035075859f,
152
38.4M
          -0.1743760259965107f,
153
38.4M
          0.1135498731499434f,
154
38.4M
      },
155
38.4M
      {
156
38.4M
          0.2500000000000000,
157
38.4M
          -0.1014005039375375f,
158
38.4M
          0.4444481661973445f,
159
38.4M
          0.3085497062849767f,
160
38.4M
          0.0000000000000000f,
161
38.4M
          -0.0643507165794627f,
162
38.4M
          0.1585450355184006f,
163
38.4M
          0.5112616136591823f,
164
38.4M
          0.2579236279634118f,
165
38.4M
          0.0000000000000000,
166
38.4M
          0.0812611176717539f,
167
38.4M
          0.1856718091610980f,
168
38.4M
          -0.3416446842253372f,
169
38.4M
          0.3302282550303788f,
170
38.4M
          0.0702790691196284f,
171
38.4M
          -0.0741750459581035f,
172
38.4M
      },
173
38.4M
      {
174
38.4M
          0.2500000000000000,
175
38.4M
          0.2206518106944236f,
176
38.4M
          0.0000000000000000,
177
38.4M
          0.0000000000000000,
178
38.4M
          0.7071067811865476f,
179
38.4M
          0.6235485373547694f,
180
38.4M
          0.0000000000000000,
181
38.4M
          0.0000000000000000,
182
38.4M
          0.0000000000000000,
183
38.4M
          0.0000000000000000,
184
38.4M
          0.0000000000000000,
185
38.4M
          0.0000000000000000,
186
38.4M
          0.0000000000000000,
187
38.4M
          0.0000000000000000,
188
38.4M
          0.0000000000000000,
189
38.4M
          0.0000000000000000,
190
38.4M
      },
191
38.4M
      {
192
38.4M
          0.2500000000000000,
193
38.4M
          -0.1014005039375378f,
194
38.4M
          0.0000000000000000,
195
38.4M
          0.4706702258572536f,
196
38.4M
          0.0000000000000000,
197
38.4M
          -0.0643507165794628f,
198
38.4M
          -0.0403851516082220f,
199
38.4M
          0.0000000000000000,
200
38.4M
          0.1627234014286620f,
201
38.4M
          0.0000000000000000,
202
38.4M
          0.0000000000000000,
203
38.4M
          0.0000000000000000,
204
38.4M
          0.7367497537172237f,
205
38.4M
          0.0875511500058708f,
206
38.4M
          -0.2921026642334881f,
207
38.4M
          0.1940289303259434f,
208
38.4M
      },
209
38.4M
      {
210
38.4M
          0.2500000000000000,
211
38.4M
          -0.1014005039375377f,
212
38.4M
          0.1957439937204294f,
213
38.4M
          -0.1621205195722993f,
214
38.4M
          0.0000000000000000,
215
38.4M
          -0.0643507165794628f,
216
38.4M
          0.0074182263792424f,
217
38.4M
          -0.2904801297289980f,
218
38.4M
          0.0952002265347504f,
219
38.4M
          0.0000000000000000,
220
38.4M
          -0.3675398009862027f,
221
38.4M
          0.4921585901373873f,
222
38.4M
          0.2462710772207515f,
223
38.4M
          -0.0794670660590957f,
224
38.4M
          0.3623817333531167f,
225
38.4M
          -0.4351904965232280f,
226
38.4M
      },
227
38.4M
      {
228
38.4M
          0.2500000000000000,
229
38.4M
          -0.1014005039375376f,
230
38.4M
          0.2929100136981264f,
231
38.4M
          0.0000000000000000,
232
38.4M
          0.0000000000000000,
233
38.4M
          -0.0643507165794627f,
234
38.4M
          0.3935103426921017f,
235
38.4M
          -0.0657870154914280f,
236
38.4M
          0.0000000000000000,
237
38.4M
          -0.4082482904638628f,
238
38.4M
          -0.3078822139579090f,
239
38.4M
          -0.3852501370925192f,
240
38.4M
          -0.0857401903551931f,
241
38.4M
          -0.4613374887461511f,
242
38.4M
          0.0000000000000000,
243
38.4M
          0.2191868483885747f,
244
38.4M
      },
245
38.4M
      {
246
38.4M
          0.2500000000000000,
247
38.4M
          -0.1014005039375376f,
248
38.4M
          -0.4067007583026072f,
249
38.4M
          -0.2125574805828705f,
250
38.4M
          0.0000000000000000,
251
38.4M
          -0.0643507165794627f,
252
38.4M
          -0.4517556589999464f,
253
38.4M
          0.3046847507248840f,
254
38.4M
          0.3017929516615503f,
255
38.4M
          -0.4082482904638635f,
256
38.4M
          -0.1747866975480813f,
257
38.4M
          0.2110560104933581f,
258
38.4M
          -0.1426608480880734f,
259
38.4M
          -0.1381354035075829f,
260
38.4M
          -0.1743760259965108f,
261
38.4M
          0.1135498731499426f,
262
38.4M
      },
263
38.4M
      {
264
38.4M
          0.2500000000000000,
265
38.4M
          -0.1014005039375377f,
266
38.4M
          -0.1957439937204287f,
267
38.4M
          -0.1621205195722833f,
268
38.4M
          0.0000000000000000,
269
38.4M
          -0.0643507165794628f,
270
38.4M
          0.0074182263792444f,
271
38.4M
          0.2904801297290076f,
272
38.4M
          0.0952002265347505f,
273
38.4M
          0.0000000000000000,
274
38.4M
          0.3675398009862011f,
275
38.4M
          -0.4921585901373891f,
276
38.4M
          0.2462710772207514f,
277
38.4M
          -0.0794670660591026f,
278
38.4M
          0.3623817333531165f,
279
38.4M
          -0.4351904965232251f,
280
38.4M
      },
281
38.4M
      {
282
38.4M
          0.2500000000000000,
283
38.4M
          -0.1014005039375375f,
284
38.4M
          0.0000000000000000,
285
38.4M
          -0.4706702258572528f,
286
38.4M
          0.0000000000000000,
287
38.4M
          -0.0643507165794627f,
288
38.4M
          0.1107416575309343f,
289
38.4M
          0.0000000000000000,
290
38.4M
          -0.1627234014286617f,
291
38.4M
          0.0000000000000000,
292
38.4M
          0.0000000000000000,
293
38.4M
          0.0000000000000000,
294
38.4M
          0.1488339922711357f,
295
38.4M
          0.4972464710953509f,
296
38.4M
          0.2921026642334879f,
297
38.4M
          0.5550443808910661f,
298
38.4M
      },
299
38.4M
      {
300
38.4M
          0.2500000000000000,
301
38.4M
          -0.1014005039375377f,
302
38.4M
          0.1137907446044809f,
303
38.4M
          -0.1464291867126764f,
304
38.4M
          0.0000000000000000,
305
38.4M
          -0.0643507165794628f,
306
38.4M
          0.0829816309488205f,
307
38.4M
          -0.2388977352334460f,
308
38.4M
          -0.3531238544981630f,
309
38.4M
          -0.4082482904638630f,
310
38.4M
          0.4826689115059883f,
311
38.4M
          0.1741941265991622f,
312
38.4M
          -0.0476868035022925f,
313
38.4M
          0.1253805944856366f,
314
38.4M
          -0.4326608024727445f,
315
38.4M
          -0.2546827712406646f,
316
38.4M
      },
317
38.4M
      {
318
38.4M
          0.2500000000000000,
319
38.4M
          -0.1014005039375377f,
320
38.4M
          -0.4444481661973438f,
321
38.4M
          0.3085497062849487f,
322
38.4M
          0.0000000000000000,
323
38.4M
          -0.0643507165794628f,
324
38.4M
          0.1585450355183970f,
325
38.4M
          -0.5112616136592012f,
326
38.4M
          0.2579236279634129f,
327
38.4M
          0.0000000000000000,
328
38.4M
          -0.0812611176717504f,
329
38.4M
          -0.1856718091610990f,
330
38.4M
          -0.3416446842253373f,
331
38.4M
          0.3302282550303805f,
332
38.4M
          0.0702790691196282f,
333
38.4M
          -0.0741750459581023f,
334
38.4M
      },
335
38.4M
      {
336
38.4M
          0.2500000000000000,
337
38.4M
          -0.1014005039375376f,
338
38.4M
          -0.2929100136981264f,
339
38.4M
          0.0000000000000000,
340
38.4M
          0.0000000000000000,
341
38.4M
          -0.0643507165794627f,
342
38.4M
          0.3935103426921022f,
343
38.4M
          0.0657870154914254f,
344
38.4M
          0.0000000000000000,
345
38.4M
          0.4082482904638634f,
346
38.4M
          0.3078822139579031f,
347
38.4M
          0.3852501370925211f,
348
38.4M
          -0.0857401903551927f,
349
38.4M
          -0.4613374887461554f,
350
38.4M
          0.0000000000000000,
351
38.4M
          0.2191868483885728f,
352
38.4M
      },
353
38.4M
      {
354
38.4M
          0.2500000000000000,
355
38.4M
          -0.1014005039375376f,
356
38.4M
          -0.1137907446044814f,
357
38.4M
          -0.1464291867126654f,
358
38.4M
          0.0000000000000000,
359
38.4M
          -0.0643507165794627f,
360
38.4M
          0.0829816309488214f,
361
38.4M
          0.2388977352334547f,
362
38.4M
          -0.3531238544981624f,
363
38.4M
          0.4082482904638630f,
364
38.4M
          -0.4826689115059858f,
365
38.4M
          -0.1741941265991621f,
366
38.4M
          -0.0476868035022928f,
367
38.4M
          0.1253805944856431f,
368
38.4M
          -0.4326608024727457f,
369
38.4M
          -0.2546827712406641f,
370
38.4M
      },
371
38.4M
      {
372
38.4M
          0.2500000000000000,
373
38.4M
          -0.1014005039375374f,
374
38.4M
          0.0000000000000000,
375
38.4M
          0.4251149611657548f,
376
38.4M
          0.0000000000000000,
377
38.4M
          -0.0643507165794626f,
378
38.4M
          -0.4517556589999480f,
379
38.4M
          0.0000000000000000,
380
38.4M
          -0.6035859033230976f,
381
38.4M
          0.0000000000000000,
382
38.4M
          0.0000000000000000,
383
38.4M
          0.0000000000000000,
384
38.4M
          -0.1426608480880724f,
385
38.4M
          -0.1381354035075845f,
386
38.4M
          0.3487520519930227f,
387
38.4M
          0.1135498731499429f,
388
38.4M
      },
389
38.4M
  };
390
391
38.4M
  const HWY_CAPPED(float, 16) d;
392
115M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
76.9M
    auto scalar = Zero(d);
394
1.30G
    for (size_t j = 0; j < 16; j++) {
395
1.23G
      auto px = Set(d, pixels[j]);
396
1.23G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
1.23G
      scalar = MulAdd(px, basis, scalar);
398
1.23G
    }
399
76.9M
    Store(scalar, d, coeffs + i);
400
76.9M
  }
401
38.4M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
763k
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
763k
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
763k
      {
102
763k
          0.2500000000000000,
103
763k
          0.8769029297991420f,
104
763k
          0.0000000000000000,
105
763k
          0.0000000000000000,
106
763k
          0.0000000000000000,
107
763k
          -0.4105377591765233f,
108
763k
          0.0000000000000000,
109
763k
          0.0000000000000000,
110
763k
          0.0000000000000000,
111
763k
          0.0000000000000000,
112
763k
          0.0000000000000000,
113
763k
          0.0000000000000000,
114
763k
          0.0000000000000000,
115
763k
          0.0000000000000000,
116
763k
          0.0000000000000000,
117
763k
          0.0000000000000000,
118
763k
      },
119
763k
      {
120
763k
          0.2500000000000000,
121
763k
          0.2206518106944235f,
122
763k
          0.0000000000000000,
123
763k
          0.0000000000000000,
124
763k
          -0.7071067811865474f,
125
763k
          0.6235485373547691f,
126
763k
          0.0000000000000000,
127
763k
          0.0000000000000000,
128
763k
          0.0000000000000000,
129
763k
          0.0000000000000000,
130
763k
          0.0000000000000000,
131
763k
          0.0000000000000000,
132
763k
          0.0000000000000000,
133
763k
          0.0000000000000000,
134
763k
          0.0000000000000000,
135
763k
          0.0000000000000000,
136
763k
      },
137
763k
      {
138
763k
          0.2500000000000000,
139
763k
          -0.1014005039375376f,
140
763k
          0.4067007583026075f,
141
763k
          -0.2125574805828875f,
142
763k
          0.0000000000000000,
143
763k
          -0.0643507165794627f,
144
763k
          -0.4517556589999482f,
145
763k
          -0.3046847507248690f,
146
763k
          0.3017929516615495f,
147
763k
          0.4082482904638627f,
148
763k
          0.1747866975480809f,
149
763k
          -0.2110560104933578f,
150
763k
          -0.1426608480880726f,
151
763k
          -0.1381354035075859f,
152
763k
          -0.1743760259965107f,
153
763k
          0.1135498731499434f,
154
763k
      },
155
763k
      {
156
763k
          0.2500000000000000,
157
763k
          -0.1014005039375375f,
158
763k
          0.4444481661973445f,
159
763k
          0.3085497062849767f,
160
763k
          0.0000000000000000f,
161
763k
          -0.0643507165794627f,
162
763k
          0.1585450355184006f,
163
763k
          0.5112616136591823f,
164
763k
          0.2579236279634118f,
165
763k
          0.0000000000000000,
166
763k
          0.0812611176717539f,
167
763k
          0.1856718091610980f,
168
763k
          -0.3416446842253372f,
169
763k
          0.3302282550303788f,
170
763k
          0.0702790691196284f,
171
763k
          -0.0741750459581035f,
172
763k
      },
173
763k
      {
174
763k
          0.2500000000000000,
175
763k
          0.2206518106944236f,
176
763k
          0.0000000000000000,
177
763k
          0.0000000000000000,
178
763k
          0.7071067811865476f,
179
763k
          0.6235485373547694f,
180
763k
          0.0000000000000000,
181
763k
          0.0000000000000000,
182
763k
          0.0000000000000000,
183
763k
          0.0000000000000000,
184
763k
          0.0000000000000000,
185
763k
          0.0000000000000000,
186
763k
          0.0000000000000000,
187
763k
          0.0000000000000000,
188
763k
          0.0000000000000000,
189
763k
          0.0000000000000000,
190
763k
      },
191
763k
      {
192
763k
          0.2500000000000000,
193
763k
          -0.1014005039375378f,
194
763k
          0.0000000000000000,
195
763k
          0.4706702258572536f,
196
763k
          0.0000000000000000,
197
763k
          -0.0643507165794628f,
198
763k
          -0.0403851516082220f,
199
763k
          0.0000000000000000,
200
763k
          0.1627234014286620f,
201
763k
          0.0000000000000000,
202
763k
          0.0000000000000000,
203
763k
          0.0000000000000000,
204
763k
          0.7367497537172237f,
205
763k
          0.0875511500058708f,
206
763k
          -0.2921026642334881f,
207
763k
          0.1940289303259434f,
208
763k
      },
209
763k
      {
210
763k
          0.2500000000000000,
211
763k
          -0.1014005039375377f,
212
763k
          0.1957439937204294f,
213
763k
          -0.1621205195722993f,
214
763k
          0.0000000000000000,
215
763k
          -0.0643507165794628f,
216
763k
          0.0074182263792424f,
217
763k
          -0.2904801297289980f,
218
763k
          0.0952002265347504f,
219
763k
          0.0000000000000000,
220
763k
          -0.3675398009862027f,
221
763k
          0.4921585901373873f,
222
763k
          0.2462710772207515f,
223
763k
          -0.0794670660590957f,
224
763k
          0.3623817333531167f,
225
763k
          -0.4351904965232280f,
226
763k
      },
227
763k
      {
228
763k
          0.2500000000000000,
229
763k
          -0.1014005039375376f,
230
763k
          0.2929100136981264f,
231
763k
          0.0000000000000000,
232
763k
          0.0000000000000000,
233
763k
          -0.0643507165794627f,
234
763k
          0.3935103426921017f,
235
763k
          -0.0657870154914280f,
236
763k
          0.0000000000000000,
237
763k
          -0.4082482904638628f,
238
763k
          -0.3078822139579090f,
239
763k
          -0.3852501370925192f,
240
763k
          -0.0857401903551931f,
241
763k
          -0.4613374887461511f,
242
763k
          0.0000000000000000,
243
763k
          0.2191868483885747f,
244
763k
      },
245
763k
      {
246
763k
          0.2500000000000000,
247
763k
          -0.1014005039375376f,
248
763k
          -0.4067007583026072f,
249
763k
          -0.2125574805828705f,
250
763k
          0.0000000000000000,
251
763k
          -0.0643507165794627f,
252
763k
          -0.4517556589999464f,
253
763k
          0.3046847507248840f,
254
763k
          0.3017929516615503f,
255
763k
          -0.4082482904638635f,
256
763k
          -0.1747866975480813f,
257
763k
          0.2110560104933581f,
258
763k
          -0.1426608480880734f,
259
763k
          -0.1381354035075829f,
260
763k
          -0.1743760259965108f,
261
763k
          0.1135498731499426f,
262
763k
      },
263
763k
      {
264
763k
          0.2500000000000000,
265
763k
          -0.1014005039375377f,
266
763k
          -0.1957439937204287f,
267
763k
          -0.1621205195722833f,
268
763k
          0.0000000000000000,
269
763k
          -0.0643507165794628f,
270
763k
          0.0074182263792444f,
271
763k
          0.2904801297290076f,
272
763k
          0.0952002265347505f,
273
763k
          0.0000000000000000,
274
763k
          0.3675398009862011f,
275
763k
          -0.4921585901373891f,
276
763k
          0.2462710772207514f,
277
763k
          -0.0794670660591026f,
278
763k
          0.3623817333531165f,
279
763k
          -0.4351904965232251f,
280
763k
      },
281
763k
      {
282
763k
          0.2500000000000000,
283
763k
          -0.1014005039375375f,
284
763k
          0.0000000000000000,
285
763k
          -0.4706702258572528f,
286
763k
          0.0000000000000000,
287
763k
          -0.0643507165794627f,
288
763k
          0.1107416575309343f,
289
763k
          0.0000000000000000,
290
763k
          -0.1627234014286617f,
291
763k
          0.0000000000000000,
292
763k
          0.0000000000000000,
293
763k
          0.0000000000000000,
294
763k
          0.1488339922711357f,
295
763k
          0.4972464710953509f,
296
763k
          0.2921026642334879f,
297
763k
          0.5550443808910661f,
298
763k
      },
299
763k
      {
300
763k
          0.2500000000000000,
301
763k
          -0.1014005039375377f,
302
763k
          0.1137907446044809f,
303
763k
          -0.1464291867126764f,
304
763k
          0.0000000000000000,
305
763k
          -0.0643507165794628f,
306
763k
          0.0829816309488205f,
307
763k
          -0.2388977352334460f,
308
763k
          -0.3531238544981630f,
309
763k
          -0.4082482904638630f,
310
763k
          0.4826689115059883f,
311
763k
          0.1741941265991622f,
312
763k
          -0.0476868035022925f,
313
763k
          0.1253805944856366f,
314
763k
          -0.4326608024727445f,
315
763k
          -0.2546827712406646f,
316
763k
      },
317
763k
      {
318
763k
          0.2500000000000000,
319
763k
          -0.1014005039375377f,
320
763k
          -0.4444481661973438f,
321
763k
          0.3085497062849487f,
322
763k
          0.0000000000000000,
323
763k
          -0.0643507165794628f,
324
763k
          0.1585450355183970f,
325
763k
          -0.5112616136592012f,
326
763k
          0.2579236279634129f,
327
763k
          0.0000000000000000,
328
763k
          -0.0812611176717504f,
329
763k
          -0.1856718091610990f,
330
763k
          -0.3416446842253373f,
331
763k
          0.3302282550303805f,
332
763k
          0.0702790691196282f,
333
763k
          -0.0741750459581023f,
334
763k
      },
335
763k
      {
336
763k
          0.2500000000000000,
337
763k
          -0.1014005039375376f,
338
763k
          -0.2929100136981264f,
339
763k
          0.0000000000000000,
340
763k
          0.0000000000000000,
341
763k
          -0.0643507165794627f,
342
763k
          0.3935103426921022f,
343
763k
          0.0657870154914254f,
344
763k
          0.0000000000000000,
345
763k
          0.4082482904638634f,
346
763k
          0.3078822139579031f,
347
763k
          0.3852501370925211f,
348
763k
          -0.0857401903551927f,
349
763k
          -0.4613374887461554f,
350
763k
          0.0000000000000000,
351
763k
          0.2191868483885728f,
352
763k
      },
353
763k
      {
354
763k
          0.2500000000000000,
355
763k
          -0.1014005039375376f,
356
763k
          -0.1137907446044814f,
357
763k
          -0.1464291867126654f,
358
763k
          0.0000000000000000,
359
763k
          -0.0643507165794627f,
360
763k
          0.0829816309488214f,
361
763k
          0.2388977352334547f,
362
763k
          -0.3531238544981624f,
363
763k
          0.4082482904638630f,
364
763k
          -0.4826689115059858f,
365
763k
          -0.1741941265991621f,
366
763k
          -0.0476868035022928f,
367
763k
          0.1253805944856431f,
368
763k
          -0.4326608024727457f,
369
763k
          -0.2546827712406641f,
370
763k
      },
371
763k
      {
372
763k
          0.2500000000000000,
373
763k
          -0.1014005039375374f,
374
763k
          0.0000000000000000,
375
763k
          0.4251149611657548f,
376
763k
          0.0000000000000000,
377
763k
          -0.0643507165794626f,
378
763k
          -0.4517556589999480f,
379
763k
          0.0000000000000000,
380
763k
          -0.6035859033230976f,
381
763k
          0.0000000000000000,
382
763k
          0.0000000000000000,
383
763k
          0.0000000000000000,
384
763k
          -0.1426608480880724f,
385
763k
          -0.1381354035075845f,
386
763k
          0.3487520519930227f,
387
763k
          0.1135498731499429f,
388
763k
      },
389
763k
  };
390
391
763k
  const HWY_CAPPED(float, 16) d;
392
2.29M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
1.52M
    auto scalar = Zero(d);
394
25.9M
    for (size_t j = 0; j < 16; j++) {
395
24.4M
      auto px = Set(d, pixels[j]);
396
24.4M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
24.4M
      scalar = MulAdd(px, basis, scalar);
398
24.4M
    }
399
1.52M
    Store(scalar, d, coeffs + i);
400
1.52M
  }
401
763k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
402
403
// Coefficient layout:
404
//  - (even, even) positions hold AFV coefficients
405
//  - (odd, even) positions hold DCT4x4 coefficients
406
//  - (any, odd) positions hold DCT4x8 coefficients
407
template <size_t afv_kind>
408
void AFVTransformFromPixels(const float* JXL_RESTRICT pixels,
409
                            size_t pixels_stride,
410
40.0M
                            float* JXL_RESTRICT coefficients) {
411
40.0M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
40.0M
  size_t afv_x = afv_kind & 1;
413
40.0M
  size_t afv_y = afv_kind / 2;
414
40.0M
  HWY_ALIGN float block[4 * 8] = {};
415
200M
  for (size_t iy = 0; iy < 4; iy++) {
416
800M
    for (size_t ix = 0; ix < 4; ix++) {
417
640M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
640M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
640M
    }
420
160M
  }
421
  // AFV coefficients in (even, even) positions.
422
40.0M
  HWY_ALIGN float coeff[4 * 4];
423
40.0M
  AFVDCT4x4(block, coeff);
424
200M
  for (size_t iy = 0; iy < 4; iy++) {
425
800M
    for (size_t ix = 0; ix < 4; ix++) {
426
640M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
640M
    }
428
160M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
40.0M
  ComputeScaledDCT<4, 4>()(
431
40.0M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
40.0M
              pixels_stride),
433
40.0M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
200M
  for (size_t iy = 0; iy < 4; iy++) {
436
1.44G
    for (size_t ix = 0; ix < 8; ix++) {
437
1.28G
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
1.28G
    }
439
160M
  }
440
  // 4x8 DCT of the other half of the block.
441
40.0M
  ComputeScaledDCT<4, 8>()(
442
40.0M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
40.0M
      block, scratch_space);
444
200M
  for (size_t iy = 0; iy < 4; iy++) {
445
1.44G
    for (size_t ix = 0; ix < 8; ix++) {
446
1.28G
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
1.28G
    }
448
160M
  }
449
40.0M
  float block00 = coefficients[0] * 0.25f;
450
40.0M
  float block01 = coefficients[1];
451
40.0M
  float block10 = coefficients[8];
452
40.0M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
40.0M
  coefficients[1] = (block00 - block01) * 0.5f;
454
40.0M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
40.0M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
196k
                            float* JXL_RESTRICT coefficients) {
411
196k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
196k
  size_t afv_x = afv_kind & 1;
413
196k
  size_t afv_y = afv_kind / 2;
414
196k
  HWY_ALIGN float block[4 * 8] = {};
415
981k
  for (size_t iy = 0; iy < 4; iy++) {
416
3.92M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.14M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
3.14M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
3.14M
    }
420
785k
  }
421
  // AFV coefficients in (even, even) positions.
422
196k
  HWY_ALIGN float coeff[4 * 4];
423
196k
  AFVDCT4x4(block, coeff);
424
981k
  for (size_t iy = 0; iy < 4; iy++) {
425
3.92M
    for (size_t ix = 0; ix < 4; ix++) {
426
3.14M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
3.14M
    }
428
785k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
196k
  ComputeScaledDCT<4, 4>()(
431
196k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
196k
              pixels_stride),
433
196k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
981k
  for (size_t iy = 0; iy < 4; iy++) {
436
7.06M
    for (size_t ix = 0; ix < 8; ix++) {
437
6.28M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
6.28M
    }
439
785k
  }
440
  // 4x8 DCT of the other half of the block.
441
196k
  ComputeScaledDCT<4, 8>()(
442
196k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
196k
      block, scratch_space);
444
981k
  for (size_t iy = 0; iy < 4; iy++) {
445
7.06M
    for (size_t ix = 0; ix < 8; ix++) {
446
6.28M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
6.28M
    }
448
785k
  }
449
196k
  float block00 = coefficients[0] * 0.25f;
450
196k
  float block01 = coefficients[1];
451
196k
  float block10 = coefficients[8];
452
196k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
196k
  coefficients[1] = (block00 - block01) * 0.5f;
454
196k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
196k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
160k
                            float* JXL_RESTRICT coefficients) {
411
160k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
160k
  size_t afv_x = afv_kind & 1;
413
160k
  size_t afv_y = afv_kind / 2;
414
160k
  HWY_ALIGN float block[4 * 8] = {};
415
801k
  for (size_t iy = 0; iy < 4; iy++) {
416
3.20M
    for (size_t ix = 0; ix < 4; ix++) {
417
2.56M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
2.56M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
2.56M
    }
420
641k
  }
421
  // AFV coefficients in (even, even) positions.
422
160k
  HWY_ALIGN float coeff[4 * 4];
423
160k
  AFVDCT4x4(block, coeff);
424
801k
  for (size_t iy = 0; iy < 4; iy++) {
425
3.20M
    for (size_t ix = 0; ix < 4; ix++) {
426
2.56M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
2.56M
    }
428
641k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
160k
  ComputeScaledDCT<4, 4>()(
431
160k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
160k
              pixels_stride),
433
160k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
801k
  for (size_t iy = 0; iy < 4; iy++) {
436
5.77M
    for (size_t ix = 0; ix < 8; ix++) {
437
5.13M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
5.13M
    }
439
641k
  }
440
  // 4x8 DCT of the other half of the block.
441
160k
  ComputeScaledDCT<4, 8>()(
442
160k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
160k
      block, scratch_space);
444
801k
  for (size_t iy = 0; iy < 4; iy++) {
445
5.77M
    for (size_t ix = 0; ix < 8; ix++) {
446
5.13M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
5.13M
    }
448
641k
  }
449
160k
  float block00 = coefficients[0] * 0.25f;
450
160k
  float block01 = coefficients[1];
451
160k
  float block10 = coefficients[8];
452
160k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
160k
  coefficients[1] = (block00 - block01) * 0.5f;
454
160k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
160k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
187k
                            float* JXL_RESTRICT coefficients) {
411
187k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
187k
  size_t afv_x = afv_kind & 1;
413
187k
  size_t afv_y = afv_kind / 2;
414
187k
  HWY_ALIGN float block[4 * 8] = {};
415
937k
  for (size_t iy = 0; iy < 4; iy++) {
416
3.74M
    for (size_t ix = 0; ix < 4; ix++) {
417
2.99M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
2.99M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
2.99M
    }
420
749k
  }
421
  // AFV coefficients in (even, even) positions.
422
187k
  HWY_ALIGN float coeff[4 * 4];
423
187k
  AFVDCT4x4(block, coeff);
424
937k
  for (size_t iy = 0; iy < 4; iy++) {
425
3.74M
    for (size_t ix = 0; ix < 4; ix++) {
426
2.99M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
2.99M
    }
428
749k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
187k
  ComputeScaledDCT<4, 4>()(
431
187k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
187k
              pixels_stride),
433
187k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
937k
  for (size_t iy = 0; iy < 4; iy++) {
436
6.74M
    for (size_t ix = 0; ix < 8; ix++) {
437
5.99M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
5.99M
    }
439
749k
  }
440
  // 4x8 DCT of the other half of the block.
441
187k
  ComputeScaledDCT<4, 8>()(
442
187k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
187k
      block, scratch_space);
444
937k
  for (size_t iy = 0; iy < 4; iy++) {
445
6.74M
    for (size_t ix = 0; ix < 8; ix++) {
446
5.99M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
5.99M
    }
448
749k
  }
449
187k
  float block00 = coefficients[0] * 0.25f;
450
187k
  float block01 = coefficients[1];
451
187k
  float block10 = coefficients[8];
452
187k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
187k
  coefficients[1] = (block00 - block01) * 0.5f;
454
187k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
187k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
219k
                            float* JXL_RESTRICT coefficients) {
411
219k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
219k
  size_t afv_x = afv_kind & 1;
413
219k
  size_t afv_y = afv_kind / 2;
414
219k
  HWY_ALIGN float block[4 * 8] = {};
415
1.09M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.38M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.50M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
3.50M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
3.50M
    }
420
876k
  }
421
  // AFV coefficients in (even, even) positions.
422
219k
  HWY_ALIGN float coeff[4 * 4];
423
219k
  AFVDCT4x4(block, coeff);
424
1.09M
  for (size_t iy = 0; iy < 4; iy++) {
425
4.38M
    for (size_t ix = 0; ix < 4; ix++) {
426
3.50M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
3.50M
    }
428
876k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
219k
  ComputeScaledDCT<4, 4>()(
431
219k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
219k
              pixels_stride),
433
219k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.09M
  for (size_t iy = 0; iy < 4; iy++) {
436
7.89M
    for (size_t ix = 0; ix < 8; ix++) {
437
7.01M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
7.01M
    }
439
876k
  }
440
  // 4x8 DCT of the other half of the block.
441
219k
  ComputeScaledDCT<4, 8>()(
442
219k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
219k
      block, scratch_space);
444
1.09M
  for (size_t iy = 0; iy < 4; iy++) {
445
7.89M
    for (size_t ix = 0; ix < 8; ix++) {
446
7.01M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
7.01M
    }
448
876k
  }
449
219k
  float block00 = coefficients[0] * 0.25f;
450
219k
  float block01 = coefficients[1];
451
219k
  float block10 = coefficients[8];
452
219k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
219k
  coefficients[1] = (block00 - block01) * 0.5f;
454
219k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
219k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
9.62M
                            float* JXL_RESTRICT coefficients) {
411
9.62M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
9.62M
  size_t afv_x = afv_kind & 1;
413
9.62M
  size_t afv_y = afv_kind / 2;
414
9.62M
  HWY_ALIGN float block[4 * 8] = {};
415
48.1M
  for (size_t iy = 0; iy < 4; iy++) {
416
192M
    for (size_t ix = 0; ix < 4; ix++) {
417
153M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
153M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
153M
    }
420
38.4M
  }
421
  // AFV coefficients in (even, even) positions.
422
9.62M
  HWY_ALIGN float coeff[4 * 4];
423
9.62M
  AFVDCT4x4(block, coeff);
424
48.1M
  for (size_t iy = 0; iy < 4; iy++) {
425
192M
    for (size_t ix = 0; ix < 4; ix++) {
426
153M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
153M
    }
428
38.4M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
9.62M
  ComputeScaledDCT<4, 4>()(
431
9.62M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
9.62M
              pixels_stride),
433
9.62M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
48.1M
  for (size_t iy = 0; iy < 4; iy++) {
436
346M
    for (size_t ix = 0; ix < 8; ix++) {
437
307M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
307M
    }
439
38.4M
  }
440
  // 4x8 DCT of the other half of the block.
441
9.62M
  ComputeScaledDCT<4, 8>()(
442
9.62M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
9.62M
      block, scratch_space);
444
48.1M
  for (size_t iy = 0; iy < 4; iy++) {
445
346M
    for (size_t ix = 0; ix < 8; ix++) {
446
307M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
307M
    }
448
38.4M
  }
449
9.62M
  float block00 = coefficients[0] * 0.25f;
450
9.62M
  float block01 = coefficients[1];
451
9.62M
  float block10 = coefficients[8];
452
9.62M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
9.62M
  coefficients[1] = (block00 - block01) * 0.5f;
454
9.62M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
9.62M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
9.62M
                            float* JXL_RESTRICT coefficients) {
411
9.62M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
9.62M
  size_t afv_x = afv_kind & 1;
413
9.62M
  size_t afv_y = afv_kind / 2;
414
9.62M
  HWY_ALIGN float block[4 * 8] = {};
415
48.1M
  for (size_t iy = 0; iy < 4; iy++) {
416
192M
    for (size_t ix = 0; ix < 4; ix++) {
417
153M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
153M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
153M
    }
420
38.4M
  }
421
  // AFV coefficients in (even, even) positions.
422
9.62M
  HWY_ALIGN float coeff[4 * 4];
423
9.62M
  AFVDCT4x4(block, coeff);
424
48.1M
  for (size_t iy = 0; iy < 4; iy++) {
425
192M
    for (size_t ix = 0; ix < 4; ix++) {
426
153M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
153M
    }
428
38.4M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
9.62M
  ComputeScaledDCT<4, 4>()(
431
9.62M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
9.62M
              pixels_stride),
433
9.62M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
48.1M
  for (size_t iy = 0; iy < 4; iy++) {
436
346M
    for (size_t ix = 0; ix < 8; ix++) {
437
307M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
307M
    }
439
38.4M
  }
440
  // 4x8 DCT of the other half of the block.
441
9.62M
  ComputeScaledDCT<4, 8>()(
442
9.62M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
9.62M
      block, scratch_space);
444
48.1M
  for (size_t iy = 0; iy < 4; iy++) {
445
346M
    for (size_t ix = 0; ix < 8; ix++) {
446
307M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
307M
    }
448
38.4M
  }
449
9.62M
  float block00 = coefficients[0] * 0.25f;
450
9.62M
  float block01 = coefficients[1];
451
9.62M
  float block10 = coefficients[8];
452
9.62M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
9.62M
  coefficients[1] = (block00 - block01) * 0.5f;
454
9.62M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
9.62M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
9.62M
                            float* JXL_RESTRICT coefficients) {
411
9.62M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
9.62M
  size_t afv_x = afv_kind & 1;
413
9.62M
  size_t afv_y = afv_kind / 2;
414
9.62M
  HWY_ALIGN float block[4 * 8] = {};
415
48.1M
  for (size_t iy = 0; iy < 4; iy++) {
416
192M
    for (size_t ix = 0; ix < 4; ix++) {
417
153M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
153M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
153M
    }
420
38.4M
  }
421
  // AFV coefficients in (even, even) positions.
422
9.62M
  HWY_ALIGN float coeff[4 * 4];
423
9.62M
  AFVDCT4x4(block, coeff);
424
48.1M
  for (size_t iy = 0; iy < 4; iy++) {
425
192M
    for (size_t ix = 0; ix < 4; ix++) {
426
153M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
153M
    }
428
38.4M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
9.62M
  ComputeScaledDCT<4, 4>()(
431
9.62M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
9.62M
              pixels_stride),
433
9.62M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
48.1M
  for (size_t iy = 0; iy < 4; iy++) {
436
346M
    for (size_t ix = 0; ix < 8; ix++) {
437
307M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
307M
    }
439
38.4M
  }
440
  // 4x8 DCT of the other half of the block.
441
9.62M
  ComputeScaledDCT<4, 8>()(
442
9.62M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
9.62M
      block, scratch_space);
444
48.1M
  for (size_t iy = 0; iy < 4; iy++) {
445
346M
    for (size_t ix = 0; ix < 8; ix++) {
446
307M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
307M
    }
448
38.4M
  }
449
9.62M
  float block00 = coefficients[0] * 0.25f;
450
9.62M
  float block01 = coefficients[1];
451
9.62M
  float block10 = coefficients[8];
452
9.62M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
9.62M
  coefficients[1] = (block00 - block01) * 0.5f;
454
9.62M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
9.62M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
9.62M
                            float* JXL_RESTRICT coefficients) {
411
9.62M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
9.62M
  size_t afv_x = afv_kind & 1;
413
9.62M
  size_t afv_y = afv_kind / 2;
414
9.62M
  HWY_ALIGN float block[4 * 8] = {};
415
48.1M
  for (size_t iy = 0; iy < 4; iy++) {
416
192M
    for (size_t ix = 0; ix < 4; ix++) {
417
153M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
153M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
153M
    }
420
38.4M
  }
421
  // AFV coefficients in (even, even) positions.
422
9.62M
  HWY_ALIGN float coeff[4 * 4];
423
9.62M
  AFVDCT4x4(block, coeff);
424
48.1M
  for (size_t iy = 0; iy < 4; iy++) {
425
192M
    for (size_t ix = 0; ix < 4; ix++) {
426
153M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
153M
    }
428
38.4M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
9.62M
  ComputeScaledDCT<4, 4>()(
431
9.62M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
9.62M
              pixels_stride),
433
9.62M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
48.1M
  for (size_t iy = 0; iy < 4; iy++) {
436
346M
    for (size_t ix = 0; ix < 8; ix++) {
437
307M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
307M
    }
439
38.4M
  }
440
  // 4x8 DCT of the other half of the block.
441
9.62M
  ComputeScaledDCT<4, 8>()(
442
9.62M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
9.62M
      block, scratch_space);
444
48.1M
  for (size_t iy = 0; iy < 4; iy++) {
445
346M
    for (size_t ix = 0; ix < 8; ix++) {
446
307M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
307M
    }
448
38.4M
  }
449
9.62M
  float block00 = coefficients[0] * 0.25f;
450
9.62M
  float block01 = coefficients[1];
451
9.62M
  float block10 = coefficients[8];
452
9.62M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
9.62M
  coefficients[1] = (block00 - block01) * 0.5f;
454
9.62M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
9.62M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
196k
                            float* JXL_RESTRICT coefficients) {
411
196k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
196k
  size_t afv_x = afv_kind & 1;
413
196k
  size_t afv_y = afv_kind / 2;
414
196k
  HWY_ALIGN float block[4 * 8] = {};
415
981k
  for (size_t iy = 0; iy < 4; iy++) {
416
3.92M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.14M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
3.14M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
3.14M
    }
420
785k
  }
421
  // AFV coefficients in (even, even) positions.
422
196k
  HWY_ALIGN float coeff[4 * 4];
423
196k
  AFVDCT4x4(block, coeff);
424
981k
  for (size_t iy = 0; iy < 4; iy++) {
425
3.92M
    for (size_t ix = 0; ix < 4; ix++) {
426
3.14M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
3.14M
    }
428
785k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
196k
  ComputeScaledDCT<4, 4>()(
431
196k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
196k
              pixels_stride),
433
196k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
981k
  for (size_t iy = 0; iy < 4; iy++) {
436
7.06M
    for (size_t ix = 0; ix < 8; ix++) {
437
6.28M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
6.28M
    }
439
785k
  }
440
  // 4x8 DCT of the other half of the block.
441
196k
  ComputeScaledDCT<4, 8>()(
442
196k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
196k
      block, scratch_space);
444
981k
  for (size_t iy = 0; iy < 4; iy++) {
445
7.06M
    for (size_t ix = 0; ix < 8; ix++) {
446
6.28M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
6.28M
    }
448
785k
  }
449
196k
  float block00 = coefficients[0] * 0.25f;
450
196k
  float block01 = coefficients[1];
451
196k
  float block10 = coefficients[8];
452
196k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
196k
  coefficients[1] = (block00 - block01) * 0.5f;
454
196k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
196k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
160k
                            float* JXL_RESTRICT coefficients) {
411
160k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
160k
  size_t afv_x = afv_kind & 1;
413
160k
  size_t afv_y = afv_kind / 2;
414
160k
  HWY_ALIGN float block[4 * 8] = {};
415
801k
  for (size_t iy = 0; iy < 4; iy++) {
416
3.20M
    for (size_t ix = 0; ix < 4; ix++) {
417
2.56M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
2.56M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
2.56M
    }
420
641k
  }
421
  // AFV coefficients in (even, even) positions.
422
160k
  HWY_ALIGN float coeff[4 * 4];
423
160k
  AFVDCT4x4(block, coeff);
424
801k
  for (size_t iy = 0; iy < 4; iy++) {
425
3.20M
    for (size_t ix = 0; ix < 4; ix++) {
426
2.56M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
2.56M
    }
428
641k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
160k
  ComputeScaledDCT<4, 4>()(
431
160k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
160k
              pixels_stride),
433
160k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
801k
  for (size_t iy = 0; iy < 4; iy++) {
436
5.77M
    for (size_t ix = 0; ix < 8; ix++) {
437
5.13M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
5.13M
    }
439
641k
  }
440
  // 4x8 DCT of the other half of the block.
441
160k
  ComputeScaledDCT<4, 8>()(
442
160k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
160k
      block, scratch_space);
444
801k
  for (size_t iy = 0; iy < 4; iy++) {
445
5.77M
    for (size_t ix = 0; ix < 8; ix++) {
446
5.13M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
5.13M
    }
448
641k
  }
449
160k
  float block00 = coefficients[0] * 0.25f;
450
160k
  float block01 = coefficients[1];
451
160k
  float block10 = coefficients[8];
452
160k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
160k
  coefficients[1] = (block00 - block01) * 0.5f;
454
160k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
160k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
187k
                            float* JXL_RESTRICT coefficients) {
411
187k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
187k
  size_t afv_x = afv_kind & 1;
413
187k
  size_t afv_y = afv_kind / 2;
414
187k
  HWY_ALIGN float block[4 * 8] = {};
415
937k
  for (size_t iy = 0; iy < 4; iy++) {
416
3.74M
    for (size_t ix = 0; ix < 4; ix++) {
417
2.99M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
2.99M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
2.99M
    }
420
749k
  }
421
  // AFV coefficients in (even, even) positions.
422
187k
  HWY_ALIGN float coeff[4 * 4];
423
187k
  AFVDCT4x4(block, coeff);
424
937k
  for (size_t iy = 0; iy < 4; iy++) {
425
3.74M
    for (size_t ix = 0; ix < 4; ix++) {
426
2.99M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
2.99M
    }
428
749k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
187k
  ComputeScaledDCT<4, 4>()(
431
187k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
187k
              pixels_stride),
433
187k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
937k
  for (size_t iy = 0; iy < 4; iy++) {
436
6.74M
    for (size_t ix = 0; ix < 8; ix++) {
437
5.99M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
5.99M
    }
439
749k
  }
440
  // 4x8 DCT of the other half of the block.
441
187k
  ComputeScaledDCT<4, 8>()(
442
187k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
187k
      block, scratch_space);
444
937k
  for (size_t iy = 0; iy < 4; iy++) {
445
6.74M
    for (size_t ix = 0; ix < 8; ix++) {
446
5.99M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
5.99M
    }
448
749k
  }
449
187k
  float block00 = coefficients[0] * 0.25f;
450
187k
  float block01 = coefficients[1];
451
187k
  float block10 = coefficients[8];
452
187k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
187k
  coefficients[1] = (block00 - block01) * 0.5f;
454
187k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
187k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
219k
                            float* JXL_RESTRICT coefficients) {
411
219k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
219k
  size_t afv_x = afv_kind & 1;
413
219k
  size_t afv_y = afv_kind / 2;
414
219k
  HWY_ALIGN float block[4 * 8] = {};
415
1.09M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.38M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.50M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
3.50M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
3.50M
    }
420
876k
  }
421
  // AFV coefficients in (even, even) positions.
422
219k
  HWY_ALIGN float coeff[4 * 4];
423
219k
  AFVDCT4x4(block, coeff);
424
1.09M
  for (size_t iy = 0; iy < 4; iy++) {
425
4.38M
    for (size_t ix = 0; ix < 4; ix++) {
426
3.50M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
3.50M
    }
428
876k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
219k
  ComputeScaledDCT<4, 4>()(
431
219k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
219k
              pixels_stride),
433
219k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.09M
  for (size_t iy = 0; iy < 4; iy++) {
436
7.89M
    for (size_t ix = 0; ix < 8; ix++) {
437
7.01M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
7.01M
    }
439
876k
  }
440
  // 4x8 DCT of the other half of the block.
441
219k
  ComputeScaledDCT<4, 8>()(
442
219k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
219k
      block, scratch_space);
444
1.09M
  for (size_t iy = 0; iy < 4; iy++) {
445
7.89M
    for (size_t ix = 0; ix < 8; ix++) {
446
7.01M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
7.01M
    }
448
876k
  }
449
219k
  float block00 = coefficients[0] * 0.25f;
450
219k
  float block01 = coefficients[1];
451
219k
  float block10 = coefficients[8];
452
219k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
219k
  coefficients[1] = (block00 - block01) * 0.5f;
454
219k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
219k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
456
457
HWY_MAYBE_UNUSED void TransformFromPixels(const AcStrategyType strategy,
458
                                          const float* JXL_RESTRICT pixels,
459
                                          size_t pixels_stride,
460
                                          float* JXL_RESTRICT coefficients,
461
141M
                                          float* JXL_RESTRICT scratch_space) {
462
141M
  using Type = AcStrategyType;
463
141M
  switch (strategy) {
464
11.3M
    case Type::IDENTITY: {
465
34.0M
      for (size_t y = 0; y < 2; y++) {
466
68.1M
        for (size_t x = 0; x < 2; x++) {
467
45.4M
          float block_dc = 0;
468
227M
          for (size_t iy = 0; iy < 4; iy++) {
469
909M
            for (size_t ix = 0; ix < 4; ix++) {
470
727M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
727M
            }
472
181M
          }
473
45.4M
          block_dc *= 1.0f / 16;
474
227M
          for (size_t iy = 0; iy < 4; iy++) {
475
909M
            for (size_t ix = 0; ix < 4; ix++) {
476
727M
              if (ix == 1 && iy == 1) continue;
477
681M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
681M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
681M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
681M
            }
481
181M
          }
482
45.4M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
45.4M
          coefficients[y * 8 + x] = block_dc;
484
45.4M
        }
485
22.7M
      }
486
11.3M
      float block00 = coefficients[0];
487
11.3M
      float block01 = coefficients[1];
488
11.3M
      float block10 = coefficients[8];
489
11.3M
      float block11 = coefficients[9];
490
11.3M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
11.3M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
11.3M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
11.3M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
11.3M
      break;
495
0
    }
496
10.1M
    case Type::DCT8X4: {
497
30.5M
      for (size_t x = 0; x < 2; x++) {
498
20.3M
        HWY_ALIGN float block[4 * 8];
499
20.3M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
20.3M
                                 scratch_space);
501
101M
        for (size_t iy = 0; iy < 4; iy++) {
502
733M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
651M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
651M
          }
506
81.4M
        }
507
20.3M
      }
508
10.1M
      float block0 = coefficients[0];
509
10.1M
      float block1 = coefficients[8];
510
10.1M
      coefficients[0] = (block0 + block1) * 0.5f;
511
10.1M
      coefficients[8] = (block0 - block1) * 0.5f;
512
10.1M
      break;
513
0
    }
514
9.87M
    case Type::DCT4X8: {
515
29.6M
      for (size_t y = 0; y < 2; y++) {
516
19.7M
        HWY_ALIGN float block[4 * 8];
517
19.7M
        ComputeScaledDCT<4, 8>()(
518
19.7M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
19.7M
            scratch_space);
520
98.7M
        for (size_t iy = 0; iy < 4; iy++) {
521
711M
          for (size_t ix = 0; ix < 8; ix++) {
522
632M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
632M
          }
524
79.0M
        }
525
19.7M
      }
526
9.87M
      float block0 = coefficients[0];
527
9.87M
      float block1 = coefficients[8];
528
9.87M
      coefficients[0] = (block0 + block1) * 0.5f;
529
9.87M
      coefficients[8] = (block0 - block1) * 0.5f;
530
9.87M
      break;
531
0
    }
532
9.62M
    case Type::DCT4X4: {
533
28.8M
      for (size_t y = 0; y < 2; y++) {
534
57.7M
        for (size_t x = 0; x < 2; x++) {
535
38.4M
          HWY_ALIGN float block[4 * 4];
536
38.4M
          ComputeScaledDCT<4, 4>()(
537
38.4M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
38.4M
              block, scratch_space);
539
192M
          for (size_t iy = 0; iy < 4; iy++) {
540
769M
            for (size_t ix = 0; ix < 4; ix++) {
541
615M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
615M
            }
543
153M
          }
544
38.4M
        }
545
19.2M
      }
546
9.62M
      float block00 = coefficients[0];
547
9.62M
      float block01 = coefficients[1];
548
9.62M
      float block10 = coefficients[8];
549
9.62M
      float block11 = coefficients[9];
550
9.62M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
9.62M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
9.62M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
9.62M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
9.62M
      break;
555
0
    }
556
11.4M
    case Type::DCT2X2: {
557
11.4M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
11.4M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
11.4M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
11.4M
      break;
561
0
    }
562
4.44M
    case Type::DCT16X16: {
563
4.44M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
4.44M
                                 scratch_space);
565
4.44M
      break;
566
0
    }
567
8.52M
    case Type::DCT16X8: {
568
8.52M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
8.52M
                                scratch_space);
570
8.52M
      break;
571
0
    }
572
8.63M
    case Type::DCT8X16: {
573
8.63M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
8.63M
                                scratch_space);
575
8.63M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
1.70M
    case Type::DCT32X16: {
588
1.70M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
1.70M
                                 scratch_space);
590
1.70M
      break;
591
0
    }
592
1.77M
    case Type::DCT16X32: {
593
1.77M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
1.77M
                                 scratch_space);
595
1.77M
      break;
596
0
    }
597
925k
    case Type::DCT32X32: {
598
925k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
925k
                                 scratch_space);
600
925k
      break;
601
0
    }
602
22.2M
    case Type::DCT: {
603
22.2M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
22.2M
                               scratch_space);
605
22.2M
      break;
606
0
    }
607
10.0M
    case Type::AFV0: {
608
10.0M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
10.0M
      break;
610
0
    }
611
9.94M
    case Type::AFV1: {
612
9.94M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
9.94M
      break;
614
0
    }
615
9.99M
    case Type::AFV2: {
616
9.99M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
9.99M
      break;
618
0
    }
619
10.0M
    case Type::AFV3: {
620
10.0M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
10.0M
      break;
622
0
    }
623
176k
    case Type::DCT64X64: {
624
176k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
176k
                                 scratch_space);
626
176k
      break;
627
0
    }
628
513k
    case Type::DCT64X32: {
629
513k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
513k
                                 scratch_space);
631
513k
      break;
632
0
    }
633
361k
    case Type::DCT32X64: {
634
361k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
361k
                                 scratch_space);
636
361k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
141M
  }
669
141M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
5.35M
                                          float* JXL_RESTRICT scratch_space) {
462
5.35M
  using Type = AcStrategyType;
463
5.35M
  switch (strategy) {
464
871k
    case Type::IDENTITY: {
465
2.61M
      for (size_t y = 0; y < 2; y++) {
466
5.22M
        for (size_t x = 0; x < 2; x++) {
467
3.48M
          float block_dc = 0;
468
17.4M
          for (size_t iy = 0; iy < 4; iy++) {
469
69.7M
            for (size_t ix = 0; ix < 4; ix++) {
470
55.7M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
55.7M
            }
472
13.9M
          }
473
3.48M
          block_dc *= 1.0f / 16;
474
17.4M
          for (size_t iy = 0; iy < 4; iy++) {
475
69.7M
            for (size_t ix = 0; ix < 4; ix++) {
476
55.7M
              if (ix == 1 && iy == 1) continue;
477
52.2M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
52.2M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
52.2M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
52.2M
            }
481
13.9M
          }
482
3.48M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
3.48M
          coefficients[y * 8 + x] = block_dc;
484
3.48M
        }
485
1.74M
      }
486
871k
      float block00 = coefficients[0];
487
871k
      float block01 = coefficients[1];
488
871k
      float block10 = coefficients[8];
489
871k
      float block11 = coefficients[9];
490
871k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
871k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
871k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
871k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
871k
      break;
495
0
    }
496
281k
    case Type::DCT8X4: {
497
845k
      for (size_t x = 0; x < 2; x++) {
498
563k
        HWY_ALIGN float block[4 * 8];
499
563k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
563k
                                 scratch_space);
501
2.81M
        for (size_t iy = 0; iy < 4; iy++) {
502
20.2M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
18.0M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
18.0M
          }
506
2.25M
        }
507
563k
      }
508
281k
      float block0 = coefficients[0];
509
281k
      float block1 = coefficients[8];
510
281k
      coefficients[0] = (block0 + block1) * 0.5f;
511
281k
      coefficients[8] = (block0 - block1) * 0.5f;
512
281k
      break;
513
0
    }
514
129k
    case Type::DCT4X8: {
515
388k
      for (size_t y = 0; y < 2; y++) {
516
258k
        HWY_ALIGN float block[4 * 8];
517
258k
        ComputeScaledDCT<4, 8>()(
518
258k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
258k
            scratch_space);
520
1.29M
        for (size_t iy = 0; iy < 4; iy++) {
521
9.32M
          for (size_t ix = 0; ix < 8; ix++) {
522
8.28M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
8.28M
          }
524
1.03M
        }
525
258k
      }
526
129k
      float block0 = coefficients[0];
527
129k
      float block1 = coefficients[8];
528
129k
      coefficients[0] = (block0 + block1) * 0.5f;
529
129k
      coefficients[8] = (block0 - block1) * 0.5f;
530
129k
      break;
531
0
    }
532
1.70k
    case Type::DCT4X4: {
533
5.12k
      for (size_t y = 0; y < 2; y++) {
534
10.2k
        for (size_t x = 0; x < 2; x++) {
535
6.82k
          HWY_ALIGN float block[4 * 4];
536
6.82k
          ComputeScaledDCT<4, 4>()(
537
6.82k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
6.82k
              block, scratch_space);
539
34.1k
          for (size_t iy = 0; iy < 4; iy++) {
540
136k
            for (size_t ix = 0; ix < 4; ix++) {
541
109k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
109k
            }
543
27.3k
          }
544
6.82k
        }
545
3.41k
      }
546
1.70k
      float block00 = coefficients[0];
547
1.70k
      float block01 = coefficients[1];
548
1.70k
      float block10 = coefficients[8];
549
1.70k
      float block11 = coefficients[9];
550
1.70k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
1.70k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
1.70k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
1.70k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
1.70k
      break;
555
0
    }
556
899k
    case Type::DCT2X2: {
557
899k
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
899k
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
899k
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
899k
      break;
561
0
    }
562
160k
    case Type::DCT16X16: {
563
160k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
160k
                                 scratch_space);
565
160k
      break;
566
0
    }
567
258k
    case Type::DCT16X8: {
568
258k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
258k
                                scratch_space);
570
258k
      break;
571
0
    }
572
303k
    case Type::DCT8X16: {
573
303k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
303k
                                scratch_space);
575
303k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
44.9k
    case Type::DCT32X16: {
588
44.9k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
44.9k
                                 scratch_space);
590
44.9k
      break;
591
0
    }
592
74.9k
    case Type::DCT16X32: {
593
74.9k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
74.9k
                                 scratch_space);
595
74.9k
      break;
596
0
    }
597
45.5k
    case Type::DCT32X32: {
598
45.5k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
45.5k
                                 scratch_space);
600
45.5k
      break;
601
0
    }
602
1.48M
    case Type::DCT: {
603
1.48M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
1.48M
                               scratch_space);
605
1.48M
      break;
606
0
    }
607
196k
    case Type::AFV0: {
608
196k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
196k
      break;
610
0
    }
611
160k
    case Type::AFV1: {
612
160k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
160k
      break;
614
0
    }
615
187k
    case Type::AFV2: {
616
187k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
187k
      break;
618
0
    }
619
219k
    case Type::AFV3: {
620
219k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
219k
      break;
622
0
    }
623
20.6k
    case Type::DCT64X64: {
624
20.6k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
20.6k
                                 scratch_space);
626
20.6k
      break;
627
0
    }
628
4.96k
    case Type::DCT64X32: {
629
4.96k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
4.96k
                                 scratch_space);
631
4.96k
      break;
632
0
    }
633
7.86k
    case Type::DCT32X64: {
634
7.86k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
7.86k
                                 scratch_space);
636
7.86k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
5.35M
  }
669
5.35M
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
121M
                                          float* JXL_RESTRICT scratch_space) {
462
121M
  using Type = AcStrategyType;
463
121M
  switch (strategy) {
464
9.62M
    case Type::IDENTITY: {
465
28.8M
      for (size_t y = 0; y < 2; y++) {
466
57.7M
        for (size_t x = 0; x < 2; x++) {
467
38.4M
          float block_dc = 0;
468
192M
          for (size_t iy = 0; iy < 4; iy++) {
469
769M
            for (size_t ix = 0; ix < 4; ix++) {
470
615M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
615M
            }
472
153M
          }
473
38.4M
          block_dc *= 1.0f / 16;
474
192M
          for (size_t iy = 0; iy < 4; iy++) {
475
769M
            for (size_t ix = 0; ix < 4; ix++) {
476
615M
              if (ix == 1 && iy == 1) continue;
477
577M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
577M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
577M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
577M
            }
481
153M
          }
482
38.4M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
38.4M
          coefficients[y * 8 + x] = block_dc;
484
38.4M
        }
485
19.2M
      }
486
9.62M
      float block00 = coefficients[0];
487
9.62M
      float block01 = coefficients[1];
488
9.62M
      float block10 = coefficients[8];
489
9.62M
      float block11 = coefficients[9];
490
9.62M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
9.62M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
9.62M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
9.62M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
9.62M
      break;
495
0
    }
496
9.62M
    case Type::DCT8X4: {
497
28.8M
      for (size_t x = 0; x < 2; x++) {
498
19.2M
        HWY_ALIGN float block[4 * 8];
499
19.2M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
19.2M
                                 scratch_space);
501
96.2M
        for (size_t iy = 0; iy < 4; iy++) {
502
692M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
615M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
615M
          }
506
76.9M
        }
507
19.2M
      }
508
9.62M
      float block0 = coefficients[0];
509
9.62M
      float block1 = coefficients[8];
510
9.62M
      coefficients[0] = (block0 + block1) * 0.5f;
511
9.62M
      coefficients[8] = (block0 - block1) * 0.5f;
512
9.62M
      break;
513
0
    }
514
9.62M
    case Type::DCT4X8: {
515
28.8M
      for (size_t y = 0; y < 2; y++) {
516
19.2M
        HWY_ALIGN float block[4 * 8];
517
19.2M
        ComputeScaledDCT<4, 8>()(
518
19.2M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
19.2M
            scratch_space);
520
96.2M
        for (size_t iy = 0; iy < 4; iy++) {
521
692M
          for (size_t ix = 0; ix < 8; ix++) {
522
615M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
615M
          }
524
76.9M
        }
525
19.2M
      }
526
9.62M
      float block0 = coefficients[0];
527
9.62M
      float block1 = coefficients[8];
528
9.62M
      coefficients[0] = (block0 + block1) * 0.5f;
529
9.62M
      coefficients[8] = (block0 - block1) * 0.5f;
530
9.62M
      break;
531
0
    }
532
9.62M
    case Type::DCT4X4: {
533
28.8M
      for (size_t y = 0; y < 2; y++) {
534
57.7M
        for (size_t x = 0; x < 2; x++) {
535
38.4M
          HWY_ALIGN float block[4 * 4];
536
38.4M
          ComputeScaledDCT<4, 4>()(
537
38.4M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
38.4M
              block, scratch_space);
539
192M
          for (size_t iy = 0; iy < 4; iy++) {
540
769M
            for (size_t ix = 0; ix < 4; ix++) {
541
615M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
615M
            }
543
153M
          }
544
38.4M
        }
545
19.2M
      }
546
9.62M
      float block00 = coefficients[0];
547
9.62M
      float block01 = coefficients[1];
548
9.62M
      float block10 = coefficients[8];
549
9.62M
      float block11 = coefficients[9];
550
9.62M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
9.62M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
9.62M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
9.62M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
9.62M
      break;
555
0
    }
556
9.62M
    case Type::DCT2X2: {
557
9.62M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
9.62M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
9.62M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
9.62M
      break;
561
0
    }
562
4.12M
    case Type::DCT16X16: {
563
4.12M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
4.12M
                                 scratch_space);
565
4.12M
      break;
566
0
    }
567
8.00M
    case Type::DCT16X8: {
568
8.00M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
8.00M
                                scratch_space);
570
8.00M
      break;
571
0
    }
572
8.02M
    case Type::DCT8X16: {
573
8.02M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
8.02M
                                scratch_space);
575
8.02M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
1.61M
    case Type::DCT32X16: {
588
1.61M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
1.61M
                                 scratch_space);
590
1.61M
      break;
591
0
    }
592
1.62M
    case Type::DCT16X32: {
593
1.62M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
1.62M
                                 scratch_space);
595
1.62M
      break;
596
0
    }
597
834k
    case Type::DCT32X32: {
598
834k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
834k
                                 scratch_space);
600
834k
      break;
601
0
    }
602
9.62M
    case Type::DCT: {
603
9.62M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
9.62M
                               scratch_space);
605
9.62M
      break;
606
0
    }
607
9.62M
    case Type::AFV0: {
608
9.62M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
9.62M
      break;
610
0
    }
611
9.62M
    case Type::AFV1: {
612
9.62M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
9.62M
      break;
614
0
    }
615
9.62M
    case Type::AFV2: {
616
9.62M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
9.62M
      break;
618
0
    }
619
9.62M
    case Type::AFV3: {
620
9.62M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
9.62M
      break;
622
0
    }
623
134k
    case Type::DCT64X64: {
624
134k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
134k
                                 scratch_space);
626
134k
      break;
627
0
    }
628
503k
    case Type::DCT64X32: {
629
503k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
503k
                                 scratch_space);
631
503k
      break;
632
0
    }
633
345k
    case Type::DCT32X64: {
634
345k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
345k
                                 scratch_space);
636
345k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
121M
  }
669
121M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
14.9M
                                          float* JXL_RESTRICT scratch_space) {
462
14.9M
  using Type = AcStrategyType;
463
14.9M
  switch (strategy) {
464
871k
    case Type::IDENTITY: {
465
2.61M
      for (size_t y = 0; y < 2; y++) {
466
5.22M
        for (size_t x = 0; x < 2; x++) {
467
3.48M
          float block_dc = 0;
468
17.4M
          for (size_t iy = 0; iy < 4; iy++) {
469
69.7M
            for (size_t ix = 0; ix < 4; ix++) {
470
55.7M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
55.7M
            }
472
13.9M
          }
473
3.48M
          block_dc *= 1.0f / 16;
474
17.4M
          for (size_t iy = 0; iy < 4; iy++) {
475
69.7M
            for (size_t ix = 0; ix < 4; ix++) {
476
55.7M
              if (ix == 1 && iy == 1) continue;
477
52.2M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
52.2M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
52.2M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
52.2M
            }
481
13.9M
          }
482
3.48M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
3.48M
          coefficients[y * 8 + x] = block_dc;
484
3.48M
        }
485
1.74M
      }
486
871k
      float block00 = coefficients[0];
487
871k
      float block01 = coefficients[1];
488
871k
      float block10 = coefficients[8];
489
871k
      float block11 = coefficients[9];
490
871k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
871k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
871k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
871k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
871k
      break;
495
0
    }
496
281k
    case Type::DCT8X4: {
497
845k
      for (size_t x = 0; x < 2; x++) {
498
563k
        HWY_ALIGN float block[4 * 8];
499
563k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
563k
                                 scratch_space);
501
2.81M
        for (size_t iy = 0; iy < 4; iy++) {
502
20.2M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
18.0M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
18.0M
          }
506
2.25M
        }
507
563k
      }
508
281k
      float block0 = coefficients[0];
509
281k
      float block1 = coefficients[8];
510
281k
      coefficients[0] = (block0 + block1) * 0.5f;
511
281k
      coefficients[8] = (block0 - block1) * 0.5f;
512
281k
      break;
513
0
    }
514
129k
    case Type::DCT4X8: {
515
388k
      for (size_t y = 0; y < 2; y++) {
516
258k
        HWY_ALIGN float block[4 * 8];
517
258k
        ComputeScaledDCT<4, 8>()(
518
258k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
258k
            scratch_space);
520
1.29M
        for (size_t iy = 0; iy < 4; iy++) {
521
9.32M
          for (size_t ix = 0; ix < 8; ix++) {
522
8.28M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
8.28M
          }
524
1.03M
        }
525
258k
      }
526
129k
      float block0 = coefficients[0];
527
129k
      float block1 = coefficients[8];
528
129k
      coefficients[0] = (block0 + block1) * 0.5f;
529
129k
      coefficients[8] = (block0 - block1) * 0.5f;
530
129k
      break;
531
0
    }
532
1.70k
    case Type::DCT4X4: {
533
5.12k
      for (size_t y = 0; y < 2; y++) {
534
10.2k
        for (size_t x = 0; x < 2; x++) {
535
6.82k
          HWY_ALIGN float block[4 * 4];
536
6.82k
          ComputeScaledDCT<4, 4>()(
537
6.82k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
6.82k
              block, scratch_space);
539
34.1k
          for (size_t iy = 0; iy < 4; iy++) {
540
136k
            for (size_t ix = 0; ix < 4; ix++) {
541
109k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
109k
            }
543
27.3k
          }
544
6.82k
        }
545
3.41k
      }
546
1.70k
      float block00 = coefficients[0];
547
1.70k
      float block01 = coefficients[1];
548
1.70k
      float block10 = coefficients[8];
549
1.70k
      float block11 = coefficients[9];
550
1.70k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
1.70k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
1.70k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
1.70k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
1.70k
      break;
555
0
    }
556
899k
    case Type::DCT2X2: {
557
899k
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
899k
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
899k
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
899k
      break;
561
0
    }
562
160k
    case Type::DCT16X16: {
563
160k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
160k
                                 scratch_space);
565
160k
      break;
566
0
    }
567
258k
    case Type::DCT16X8: {
568
258k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
258k
                                scratch_space);
570
258k
      break;
571
0
    }
572
303k
    case Type::DCT8X16: {
573
303k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
303k
                                scratch_space);
575
303k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
44.9k
    case Type::DCT32X16: {
588
44.9k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
44.9k
                                 scratch_space);
590
44.9k
      break;
591
0
    }
592
74.9k
    case Type::DCT16X32: {
593
74.9k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
74.9k
                                 scratch_space);
595
74.9k
      break;
596
0
    }
597
45.5k
    case Type::DCT32X32: {
598
45.5k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
45.5k
                                 scratch_space);
600
45.5k
      break;
601
0
    }
602
11.1M
    case Type::DCT: {
603
11.1M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
11.1M
                               scratch_space);
605
11.1M
      break;
606
0
    }
607
196k
    case Type::AFV0: {
608
196k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
196k
      break;
610
0
    }
611
160k
    case Type::AFV1: {
612
160k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
160k
      break;
614
0
    }
615
187k
    case Type::AFV2: {
616
187k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
187k
      break;
618
0
    }
619
219k
    case Type::AFV3: {
620
219k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
219k
      break;
622
0
    }
623
20.6k
    case Type::DCT64X64: {
624
20.6k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
20.6k
                                 scratch_space);
626
20.6k
      break;
627
0
    }
628
4.96k
    case Type::DCT64X32: {
629
4.96k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
4.96k
                                 scratch_space);
631
4.96k
      break;
632
0
    }
633
7.86k
    case Type::DCT32X64: {
634
7.86k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
7.86k
                                 scratch_space);
636
7.86k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
14.9M
  }
669
14.9M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
670
671
// `scratch_space` should be at least 4 * kMaxBlocks * kMaxBlocks elements.
672
HWY_MAYBE_UNUSED void DCFromLowestFrequencies(const AcStrategyType strategy,
673
                                              const float* block, float* dc,
674
                                              size_t dc_stride,
675
20.3M
                                              float* scratch_space) {
676
20.3M
  using Type = AcStrategyType;
677
20.3M
  switch (strategy) {
678
516k
    case Type::DCT16X8: {
679
516k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
516k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
516k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
516k
      break;
683
0
    }
684
607k
    case Type::DCT8X16: {
685
607k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
607k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
607k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
607k
      break;
689
0
    }
690
321k
    case Type::DCT16X16: {
691
321k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
321k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
321k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
321k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
89.8k
    case Type::DCT32X16: {
709
89.8k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
89.8k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
89.8k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
89.8k
      break;
713
0
    }
714
149k
    case Type::DCT16X32: {
715
149k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
149k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
149k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
149k
      break;
719
0
    }
720
91.0k
    case Type::DCT32X32: {
721
91.0k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
91.0k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
91.0k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
91.0k
      break;
725
0
    }
726
9.93k
    case Type::DCT64X32: {
727
9.93k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
9.93k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
9.93k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
9.93k
      break;
731
0
    }
732
15.7k
    case Type::DCT32X64: {
733
15.7k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
15.7k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
15.7k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
15.7k
      break;
737
0
    }
738
41.2k
    case Type::DCT64X64: {
739
41.2k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
41.2k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
41.2k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
41.2k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
12.5M
    case Type::DCT:
787
14.3M
    case Type::DCT2X2:
788
14.4M
    case Type::DCT4X4:
789
14.6M
    case Type::DCT4X8:
790
15.2M
    case Type::DCT8X4:
791
15.6M
    case Type::AFV0:
792
15.9M
    case Type::AFV1:
793
16.3M
    case Type::AFV2:
794
16.7M
    case Type::AFV3:
795
18.4M
    case Type::IDENTITY:
796
18.4M
      dc[0] = block[0];
797
18.4M
      break;
798
20.3M
  }
799
20.3M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
5.35M
                                              float* scratch_space) {
676
5.35M
  using Type = AcStrategyType;
677
5.35M
  switch (strategy) {
678
258k
    case Type::DCT16X8: {
679
258k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
258k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
258k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
258k
      break;
683
0
    }
684
303k
    case Type::DCT8X16: {
685
303k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
303k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
303k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
303k
      break;
689
0
    }
690
160k
    case Type::DCT16X16: {
691
160k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
160k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
160k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
160k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
44.9k
    case Type::DCT32X16: {
709
44.9k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
44.9k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
44.9k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
44.9k
      break;
713
0
    }
714
74.9k
    case Type::DCT16X32: {
715
74.9k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
74.9k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
74.9k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
74.9k
      break;
719
0
    }
720
45.5k
    case Type::DCT32X32: {
721
45.5k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
45.5k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
45.5k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
45.5k
      break;
725
0
    }
726
4.96k
    case Type::DCT64X32: {
727
4.96k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
4.96k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
4.96k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
4.96k
      break;
731
0
    }
732
7.86k
    case Type::DCT32X64: {
733
7.86k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
7.86k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
7.86k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
7.86k
      break;
737
0
    }
738
20.6k
    case Type::DCT64X64: {
739
20.6k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
20.6k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
20.6k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
20.6k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
1.48M
    case Type::DCT:
787
2.38M
    case Type::DCT2X2:
788
2.39M
    case Type::DCT4X4:
789
2.52M
    case Type::DCT4X8:
790
2.80M
    case Type::DCT8X4:
791
2.99M
    case Type::AFV0:
792
3.15M
    case Type::AFV1:
793
3.34M
    case Type::AFV2:
794
3.56M
    case Type::AFV3:
795
4.43M
    case Type::IDENTITY:
796
4.43M
      dc[0] = block[0];
797
4.43M
      break;
798
5.35M
  }
799
5.35M
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
14.9M
                                              float* scratch_space) {
676
14.9M
  using Type = AcStrategyType;
677
14.9M
  switch (strategy) {
678
258k
    case Type::DCT16X8: {
679
258k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
258k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
258k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
258k
      break;
683
0
    }
684
303k
    case Type::DCT8X16: {
685
303k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
303k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
303k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
303k
      break;
689
0
    }
690
160k
    case Type::DCT16X16: {
691
160k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
160k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
160k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
160k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
44.9k
    case Type::DCT32X16: {
709
44.9k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
44.9k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
44.9k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
44.9k
      break;
713
0
    }
714
74.9k
    case Type::DCT16X32: {
715
74.9k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
74.9k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
74.9k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
74.9k
      break;
719
0
    }
720
45.5k
    case Type::DCT32X32: {
721
45.5k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
45.5k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
45.5k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
45.5k
      break;
725
0
    }
726
4.96k
    case Type::DCT64X32: {
727
4.96k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
4.96k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
4.96k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
4.96k
      break;
731
0
    }
732
7.86k
    case Type::DCT32X64: {
733
7.86k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
7.86k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
7.86k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
7.86k
      break;
737
0
    }
738
20.6k
    case Type::DCT64X64: {
739
20.6k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
20.6k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
20.6k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
20.6k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
11.1M
    case Type::DCT:
787
12.0M
    case Type::DCT2X2:
788
12.0M
    case Type::DCT4X4:
789
12.1M
    case Type::DCT4X8:
790
12.4M
    case Type::DCT8X4:
791
12.6M
    case Type::AFV0:
792
12.7M
    case Type::AFV1:
793
12.9M
    case Type::AFV2:
794
13.1M
    case Type::AFV3:
795
14.0M
    case Type::IDENTITY:
796
14.0M
      dc[0] = block[0];
797
14.0M
      break;
798
14.9M
  }
799
14.9M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
800
801
}  // namespace
802
// NOLINTNEXTLINE(google-readability-namespace-comments)
803
}  // namespace HWY_NAMESPACE
804
}  // namespace jxl
805
HWY_AFTER_NAMESPACE();
806
807
#endif  // LIB_JXL_ENC_TRANSFORMS_INL_H_