Coverage Report

Created: 2025-12-03 07:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/enc_transforms-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/base/compiler_specific.h"
7
#include "lib/jxl/frame_dimensions.h"
8
9
#if defined(LIB_JXL_ENC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
10
#ifdef LIB_JXL_ENC_TRANSFORMS_INL_H_
11
#undef LIB_JXL_ENC_TRANSFORMS_INL_H_
12
#else
13
#define LIB_JXL_ENC_TRANSFORMS_INL_H_
14
#endif
15
16
#include <cstddef>
17
#include <cstdint>
18
#include <hwy/highway.h>
19
20
#include "lib/jxl/ac_strategy.h"
21
#include "lib/jxl/dct-inl.h"
22
#include "lib/jxl/dct_scales.h"
23
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
27
enum class AcStrategyType : uint32_t;
28
29
namespace HWY_NAMESPACE {
30
namespace {
31
32
constexpr size_t kMaxBlocks = 32;
33
34
// Inverse of ReinterpretingDCT.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
HWY_INLINE void ReinterpretingIDCT(const float* input,
38
                                   const size_t input_stride, float* output,
39
2.51M
                                   const size_t output_stride, float* scratch) {
40
2.51M
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
2.51M
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
2.51M
  float* block = scratch;
43
2.51M
  if (ROWS < COLS) {
44
2.40M
    for (size_t y = 0; y < LF_ROWS; y++) {
45
5.49M
      for (size_t x = 0; x < LF_COLS; x++) {
46
4.14M
        block[y * COLS + x] = input[y * input_stride + x] *
47
4.14M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
4.14M
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
4.14M
      }
50
1.35M
    }
51
1.46M
  } else {
52
4.80M
    for (size_t y = 0; y < LF_COLS; y++) {
53
16.2M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
12.9M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
12.9M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
12.9M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
12.9M
      }
58
3.33M
    }
59
1.46M
  }
60
61
2.51M
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
2.51M
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
2.51M
                                  scratch_space);
64
2.51M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
293k
                                   const size_t output_stride, float* scratch) {
40
293k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
293k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
293k
  float* block = scratch;
43
293k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
293k
  } else {
52
587k
    for (size_t y = 0; y < LF_COLS; y++) {
53
881k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
587k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
587k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
587k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
587k
      }
58
293k
    }
59
293k
  }
60
61
293k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
293k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
293k
                                  scratch_space);
64
293k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
394k
                                   const size_t output_stride, float* scratch) {
40
394k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
394k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
394k
  float* block = scratch;
43
394k
  if (ROWS < COLS) {
44
788k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.18M
      for (size_t x = 0; x < LF_COLS; x++) {
46
788k
        block[y * COLS + x] = input[y * input_stride + x] *
47
788k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
788k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
788k
      }
50
394k
    }
51
394k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
394k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
394k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
394k
                                  scratch_space);
64
394k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
178k
                                   const size_t output_stride, float* scratch) {
40
178k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
178k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
178k
  float* block = scratch;
43
178k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
178k
  } else {
52
534k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.06M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
712k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
712k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
712k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
712k
      }
58
356k
    }
59
178k
  }
60
61
178k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
178k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
178k
                                  scratch_space);
64
178k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
77.5k
                                   const size_t output_stride, float* scratch) {
40
77.5k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
77.5k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
77.5k
  float* block = scratch;
43
77.5k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
77.5k
  } else {
52
232k
    for (size_t y = 0; y < LF_COLS; y++) {
53
775k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
620k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
620k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
620k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
620k
      }
58
155k
    }
59
77.5k
  }
60
61
77.5k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
77.5k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
77.5k
                                  scratch_space);
64
77.5k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
121k
                                   const size_t output_stride, float* scratch) {
40
121k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
121k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
121k
  float* block = scratch;
43
121k
  if (ROWS < COLS) {
44
363k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.21M
      for (size_t x = 0; x < LF_COLS; x++) {
46
970k
        block[y * COLS + x] = input[y * input_stride + x] *
47
970k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
970k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
970k
      }
50
242k
    }
51
121k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
121k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
121k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
121k
                                  scratch_space);
64
121k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
148k
                                   const size_t output_stride, float* scratch) {
40
148k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
148k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
148k
  float* block = scratch;
43
148k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
148k
  } else {
52
741k
    for (size_t y = 0; y < LF_COLS; y++) {
53
2.96M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
2.37M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
2.37M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
2.37M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
2.37M
      }
58
593k
    }
59
148k
  }
60
61
148k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
148k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
148k
                                  scratch_space);
64
148k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
5.78k
                                   const size_t output_stride, float* scratch) {
40
5.78k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
5.78k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
5.78k
  float* block = scratch;
43
5.78k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
5.78k
  } else {
52
28.9k
    for (size_t y = 0; y < LF_COLS; y++) {
53
208k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
184k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
184k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
184k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
184k
      }
58
23.1k
    }
59
5.78k
  }
60
61
5.78k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
5.78k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
5.78k
                                  scratch_space);
64
5.78k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
9.74k
                                   const size_t output_stride, float* scratch) {
40
9.74k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
9.74k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
9.74k
  float* block = scratch;
43
9.74k
  if (ROWS < COLS) {
44
48.7k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
350k
      for (size_t x = 0; x < LF_COLS; x++) {
46
311k
        block[y * COLS + x] = input[y * input_stride + x] *
47
311k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
311k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
311k
      }
50
38.9k
    }
51
9.74k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
9.74k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
9.74k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
9.74k
                                  scratch_space);
64
9.74k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
30.7k
                                   const size_t output_stride, float* scratch) {
40
30.7k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
30.7k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
30.7k
  float* block = scratch;
43
30.7k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
30.7k
  } else {
52
277k
    for (size_t y = 0; y < LF_COLS; y++) {
53
2.21M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
1.97M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
1.97M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
1.97M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
1.97M
      }
58
246k
    }
59
30.7k
  }
60
61
30.7k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
30.7k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
30.7k
                                  scratch_space);
64
30.7k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
293k
                                   const size_t output_stride, float* scratch) {
40
293k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
293k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
293k
  float* block = scratch;
43
293k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
293k
  } else {
52
587k
    for (size_t y = 0; y < LF_COLS; y++) {
53
881k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
587k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
587k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
587k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
587k
      }
58
293k
    }
59
293k
  }
60
61
293k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
293k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
293k
                                  scratch_space);
64
293k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
394k
                                   const size_t output_stride, float* scratch) {
40
394k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
394k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
394k
  float* block = scratch;
43
394k
  if (ROWS < COLS) {
44
788k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.18M
      for (size_t x = 0; x < LF_COLS; x++) {
46
788k
        block[y * COLS + x] = input[y * input_stride + x] *
47
788k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
788k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
788k
      }
50
394k
    }
51
394k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
394k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
394k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
394k
                                  scratch_space);
64
394k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
178k
                                   const size_t output_stride, float* scratch) {
40
178k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
178k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
178k
  float* block = scratch;
43
178k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
178k
  } else {
52
534k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.06M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
712k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
712k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
712k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
712k
      }
58
356k
    }
59
178k
  }
60
61
178k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
178k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
178k
                                  scratch_space);
64
178k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
77.5k
                                   const size_t output_stride, float* scratch) {
40
77.5k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
77.5k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
77.5k
  float* block = scratch;
43
77.5k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
77.5k
  } else {
52
232k
    for (size_t y = 0; y < LF_COLS; y++) {
53
775k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
620k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
620k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
620k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
620k
      }
58
155k
    }
59
77.5k
  }
60
61
77.5k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
77.5k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
77.5k
                                  scratch_space);
64
77.5k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
121k
                                   const size_t output_stride, float* scratch) {
40
121k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
121k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
121k
  float* block = scratch;
43
121k
  if (ROWS < COLS) {
44
363k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.21M
      for (size_t x = 0; x < LF_COLS; x++) {
46
970k
        block[y * COLS + x] = input[y * input_stride + x] *
47
970k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
970k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
970k
      }
50
242k
    }
51
121k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
121k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
121k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
121k
                                  scratch_space);
64
121k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
148k
                                   const size_t output_stride, float* scratch) {
40
148k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
148k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
148k
  float* block = scratch;
43
148k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
148k
  } else {
52
741k
    for (size_t y = 0; y < LF_COLS; y++) {
53
2.96M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
2.37M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
2.37M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
2.37M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
2.37M
      }
58
593k
    }
59
148k
  }
60
61
148k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
148k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
148k
                                  scratch_space);
64
148k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
5.78k
                                   const size_t output_stride, float* scratch) {
40
5.78k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
5.78k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
5.78k
  float* block = scratch;
43
5.78k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
5.78k
  } else {
52
28.9k
    for (size_t y = 0; y < LF_COLS; y++) {
53
208k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
184k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
184k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
184k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
184k
      }
58
23.1k
    }
59
5.78k
  }
60
61
5.78k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
5.78k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
5.78k
                                  scratch_space);
64
5.78k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
9.74k
                                   const size_t output_stride, float* scratch) {
40
9.74k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
9.74k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
9.74k
  float* block = scratch;
43
9.74k
  if (ROWS < COLS) {
44
48.7k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
350k
      for (size_t x = 0; x < LF_COLS; x++) {
46
311k
        block[y * COLS + x] = input[y * input_stride + x] *
47
311k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
311k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
311k
      }
50
38.9k
    }
51
9.74k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
9.74k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
9.74k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
9.74k
                                  scratch_space);
64
9.74k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
30.7k
                                   const size_t output_stride, float* scratch) {
40
30.7k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
30.7k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
30.7k
  float* block = scratch;
43
30.7k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
30.7k
  } else {
52
277k
    for (size_t y = 0; y < LF_COLS; y++) {
53
2.21M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
1.97M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
1.97M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
1.97M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
1.97M
      }
58
246k
    }
59
30.7k
  }
60
61
30.7k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
30.7k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
30.7k
                                  scratch_space);
64
30.7k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
65
66
template <size_t S>
67
51.6M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
51.6M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
51.6M
  static_assert(S % 2 == 0, "S should be even");
70
51.6M
  float temp[kDCTBlockSize];
71
51.6M
  constexpr size_t num_2x2 = S / 2;
72
172M
  for (size_t y = 0; y < num_2x2; y++) {
73
482M
    for (size_t x = 0; x < num_2x2; x++) {
74
361M
      float c00 = block[y * 2 * stride + x * 2];
75
361M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
361M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
361M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
361M
      float r00 = c00 + c01 + c10 + c11;
79
361M
      float r01 = c00 + c01 - c10 - c11;
80
361M
      float r10 = c00 - c01 + c10 - c11;
81
361M
      float r11 = c00 - c01 - c10 + c11;
82
361M
      r00 *= 0.25f;
83
361M
      r01 *= 0.25f;
84
361M
      r10 *= 0.25f;
85
361M
      r11 *= 0.25f;
86
361M
      temp[y * kBlockDim + x] = r00;
87
361M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
361M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
361M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
361M
    }
91
120M
  }
92
292M
  for (size_t y = 0; y < S; y++) {
93
1.68G
    for (size_t x = 0; x < S; x++) {
94
1.44G
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
1.44G
    }
96
241M
  }
97
51.6M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.03M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.03M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.03M
  static_assert(S % 2 == 0, "S should be even");
70
1.03M
  float temp[kDCTBlockSize];
71
1.03M
  constexpr size_t num_2x2 = S / 2;
72
5.16M
  for (size_t y = 0; y < num_2x2; y++) {
73
20.6M
    for (size_t x = 0; x < num_2x2; x++) {
74
16.5M
      float c00 = block[y * 2 * stride + x * 2];
75
16.5M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
16.5M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
16.5M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
16.5M
      float r00 = c00 + c01 + c10 + c11;
79
16.5M
      float r01 = c00 + c01 - c10 - c11;
80
16.5M
      float r10 = c00 - c01 + c10 - c11;
81
16.5M
      float r11 = c00 - c01 - c10 + c11;
82
16.5M
      r00 *= 0.25f;
83
16.5M
      r01 *= 0.25f;
84
16.5M
      r10 *= 0.25f;
85
16.5M
      r11 *= 0.25f;
86
16.5M
      temp[y * kBlockDim + x] = r00;
87
16.5M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
16.5M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
16.5M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
16.5M
    }
91
4.12M
  }
92
9.29M
  for (size_t y = 0; y < S; y++) {
93
74.3M
    for (size_t x = 0; x < S; x++) {
94
66.0M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
66.0M
    }
96
8.25M
  }
97
1.03M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.03M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.03M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.03M
  static_assert(S % 2 == 0, "S should be even");
70
1.03M
  float temp[kDCTBlockSize];
71
1.03M
  constexpr size_t num_2x2 = S / 2;
72
3.09M
  for (size_t y = 0; y < num_2x2; y++) {
73
6.19M
    for (size_t x = 0; x < num_2x2; x++) {
74
4.12M
      float c00 = block[y * 2 * stride + x * 2];
75
4.12M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
4.12M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
4.12M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
4.12M
      float r00 = c00 + c01 + c10 + c11;
79
4.12M
      float r01 = c00 + c01 - c10 - c11;
80
4.12M
      float r10 = c00 - c01 + c10 - c11;
81
4.12M
      float r11 = c00 - c01 - c10 + c11;
82
4.12M
      r00 *= 0.25f;
83
4.12M
      r01 *= 0.25f;
84
4.12M
      r10 *= 0.25f;
85
4.12M
      r11 *= 0.25f;
86
4.12M
      temp[y * kBlockDim + x] = r00;
87
4.12M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
4.12M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
4.12M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
4.12M
    }
91
2.06M
  }
92
5.16M
  for (size_t y = 0; y < S; y++) {
93
20.6M
    for (size_t x = 0; x < S; x++) {
94
16.5M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
16.5M
    }
96
4.12M
  }
97
1.03M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.03M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.03M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.03M
  static_assert(S % 2 == 0, "S should be even");
70
1.03M
  float temp[kDCTBlockSize];
71
1.03M
  constexpr size_t num_2x2 = S / 2;
72
2.06M
  for (size_t y = 0; y < num_2x2; y++) {
73
2.06M
    for (size_t x = 0; x < num_2x2; x++) {
74
1.03M
      float c00 = block[y * 2 * stride + x * 2];
75
1.03M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
1.03M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
1.03M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
1.03M
      float r00 = c00 + c01 + c10 + c11;
79
1.03M
      float r01 = c00 + c01 - c10 - c11;
80
1.03M
      float r10 = c00 - c01 + c10 - c11;
81
1.03M
      float r11 = c00 - c01 - c10 + c11;
82
1.03M
      r00 *= 0.25f;
83
1.03M
      r01 *= 0.25f;
84
1.03M
      r10 *= 0.25f;
85
1.03M
      r11 *= 0.25f;
86
1.03M
      temp[y * kBlockDim + x] = r00;
87
1.03M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
1.03M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
1.03M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
1.03M
    }
91
1.03M
  }
92
3.09M
  for (size_t y = 0; y < S; y++) {
93
6.19M
    for (size_t x = 0; x < S; x++) {
94
4.12M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
4.12M
    }
96
2.06M
  }
97
1.03M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
15.1M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
15.1M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
15.1M
  static_assert(S % 2 == 0, "S should be even");
70
15.1M
  float temp[kDCTBlockSize];
71
15.1M
  constexpr size_t num_2x2 = S / 2;
72
75.8M
  for (size_t y = 0; y < num_2x2; y++) {
73
303M
    for (size_t x = 0; x < num_2x2; x++) {
74
242M
      float c00 = block[y * 2 * stride + x * 2];
75
242M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
242M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
242M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
242M
      float r00 = c00 + c01 + c10 + c11;
79
242M
      float r01 = c00 + c01 - c10 - c11;
80
242M
      float r10 = c00 - c01 + c10 - c11;
81
242M
      float r11 = c00 - c01 - c10 + c11;
82
242M
      r00 *= 0.25f;
83
242M
      r01 *= 0.25f;
84
242M
      r10 *= 0.25f;
85
242M
      r11 *= 0.25f;
86
242M
      temp[y * kBlockDim + x] = r00;
87
242M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
242M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
242M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
242M
    }
91
60.6M
  }
92
136M
  for (size_t y = 0; y < S; y++) {
93
1.09G
    for (size_t x = 0; x < S; x++) {
94
970M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
970M
    }
96
121M
  }
97
15.1M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
15.1M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
15.1M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
15.1M
  static_assert(S % 2 == 0, "S should be even");
70
15.1M
  float temp[kDCTBlockSize];
71
15.1M
  constexpr size_t num_2x2 = S / 2;
72
45.4M
  for (size_t y = 0; y < num_2x2; y++) {
73
90.9M
    for (size_t x = 0; x < num_2x2; x++) {
74
60.6M
      float c00 = block[y * 2 * stride + x * 2];
75
60.6M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
60.6M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
60.6M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
60.6M
      float r00 = c00 + c01 + c10 + c11;
79
60.6M
      float r01 = c00 + c01 - c10 - c11;
80
60.6M
      float r10 = c00 - c01 + c10 - c11;
81
60.6M
      float r11 = c00 - c01 - c10 + c11;
82
60.6M
      r00 *= 0.25f;
83
60.6M
      r01 *= 0.25f;
84
60.6M
      r10 *= 0.25f;
85
60.6M
      r11 *= 0.25f;
86
60.6M
      temp[y * kBlockDim + x] = r00;
87
60.6M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
60.6M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
60.6M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
60.6M
    }
91
30.3M
  }
92
75.8M
  for (size_t y = 0; y < S; y++) {
93
303M
    for (size_t x = 0; x < S; x++) {
94
242M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
242M
    }
96
60.6M
  }
97
15.1M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
15.1M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
15.1M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
15.1M
  static_assert(S % 2 == 0, "S should be even");
70
15.1M
  float temp[kDCTBlockSize];
71
15.1M
  constexpr size_t num_2x2 = S / 2;
72
30.3M
  for (size_t y = 0; y < num_2x2; y++) {
73
30.3M
    for (size_t x = 0; x < num_2x2; x++) {
74
15.1M
      float c00 = block[y * 2 * stride + x * 2];
75
15.1M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
15.1M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
15.1M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
15.1M
      float r00 = c00 + c01 + c10 + c11;
79
15.1M
      float r01 = c00 + c01 - c10 - c11;
80
15.1M
      float r10 = c00 - c01 + c10 - c11;
81
15.1M
      float r11 = c00 - c01 - c10 + c11;
82
15.1M
      r00 *= 0.25f;
83
15.1M
      r01 *= 0.25f;
84
15.1M
      r10 *= 0.25f;
85
15.1M
      r11 *= 0.25f;
86
15.1M
      temp[y * kBlockDim + x] = r00;
87
15.1M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
15.1M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
15.1M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
15.1M
    }
91
15.1M
  }
92
45.4M
  for (size_t y = 0; y < S; y++) {
93
90.9M
    for (size_t x = 0; x < S; x++) {
94
60.6M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
60.6M
    }
96
30.3M
  }
97
15.1M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.03M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.03M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.03M
  static_assert(S % 2 == 0, "S should be even");
70
1.03M
  float temp[kDCTBlockSize];
71
1.03M
  constexpr size_t num_2x2 = S / 2;
72
5.16M
  for (size_t y = 0; y < num_2x2; y++) {
73
20.6M
    for (size_t x = 0; x < num_2x2; x++) {
74
16.5M
      float c00 = block[y * 2 * stride + x * 2];
75
16.5M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
16.5M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
16.5M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
16.5M
      float r00 = c00 + c01 + c10 + c11;
79
16.5M
      float r01 = c00 + c01 - c10 - c11;
80
16.5M
      float r10 = c00 - c01 + c10 - c11;
81
16.5M
      float r11 = c00 - c01 - c10 + c11;
82
16.5M
      r00 *= 0.25f;
83
16.5M
      r01 *= 0.25f;
84
16.5M
      r10 *= 0.25f;
85
16.5M
      r11 *= 0.25f;
86
16.5M
      temp[y * kBlockDim + x] = r00;
87
16.5M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
16.5M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
16.5M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
16.5M
    }
91
4.12M
  }
92
9.29M
  for (size_t y = 0; y < S; y++) {
93
74.3M
    for (size_t x = 0; x < S; x++) {
94
66.0M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
66.0M
    }
96
8.25M
  }
97
1.03M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.03M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.03M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.03M
  static_assert(S % 2 == 0, "S should be even");
70
1.03M
  float temp[kDCTBlockSize];
71
1.03M
  constexpr size_t num_2x2 = S / 2;
72
3.09M
  for (size_t y = 0; y < num_2x2; y++) {
73
6.19M
    for (size_t x = 0; x < num_2x2; x++) {
74
4.12M
      float c00 = block[y * 2 * stride + x * 2];
75
4.12M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
4.12M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
4.12M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
4.12M
      float r00 = c00 + c01 + c10 + c11;
79
4.12M
      float r01 = c00 + c01 - c10 - c11;
80
4.12M
      float r10 = c00 - c01 + c10 - c11;
81
4.12M
      float r11 = c00 - c01 - c10 + c11;
82
4.12M
      r00 *= 0.25f;
83
4.12M
      r01 *= 0.25f;
84
4.12M
      r10 *= 0.25f;
85
4.12M
      r11 *= 0.25f;
86
4.12M
      temp[y * kBlockDim + x] = r00;
87
4.12M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
4.12M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
4.12M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
4.12M
    }
91
2.06M
  }
92
5.16M
  for (size_t y = 0; y < S; y++) {
93
20.6M
    for (size_t x = 0; x < S; x++) {
94
16.5M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
16.5M
    }
96
4.12M
  }
97
1.03M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.03M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.03M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.03M
  static_assert(S % 2 == 0, "S should be even");
70
1.03M
  float temp[kDCTBlockSize];
71
1.03M
  constexpr size_t num_2x2 = S / 2;
72
2.06M
  for (size_t y = 0; y < num_2x2; y++) {
73
2.06M
    for (size_t x = 0; x < num_2x2; x++) {
74
1.03M
      float c00 = block[y * 2 * stride + x * 2];
75
1.03M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
1.03M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
1.03M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
1.03M
      float r00 = c00 + c01 + c10 + c11;
79
1.03M
      float r01 = c00 + c01 - c10 - c11;
80
1.03M
      float r10 = c00 - c01 + c10 - c11;
81
1.03M
      float r11 = c00 - c01 - c10 + c11;
82
1.03M
      r00 *= 0.25f;
83
1.03M
      r01 *= 0.25f;
84
1.03M
      r10 *= 0.25f;
85
1.03M
      r11 *= 0.25f;
86
1.03M
      temp[y * kBlockDim + x] = r00;
87
1.03M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
1.03M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
1.03M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
1.03M
    }
91
1.03M
  }
92
3.09M
  for (size_t y = 0; y < S; y++) {
93
6.19M
    for (size_t x = 0; x < S; x++) {
94
4.12M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
4.12M
    }
96
2.06M
  }
97
1.03M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
98
99
62.8M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
62.8M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
62.8M
      {
102
62.8M
          0.2500000000000000,
103
62.8M
          0.8769029297991420f,
104
62.8M
          0.0000000000000000,
105
62.8M
          0.0000000000000000,
106
62.8M
          0.0000000000000000,
107
62.8M
          -0.4105377591765233f,
108
62.8M
          0.0000000000000000,
109
62.8M
          0.0000000000000000,
110
62.8M
          0.0000000000000000,
111
62.8M
          0.0000000000000000,
112
62.8M
          0.0000000000000000,
113
62.8M
          0.0000000000000000,
114
62.8M
          0.0000000000000000,
115
62.8M
          0.0000000000000000,
116
62.8M
          0.0000000000000000,
117
62.8M
          0.0000000000000000,
118
62.8M
      },
119
62.8M
      {
120
62.8M
          0.2500000000000000,
121
62.8M
          0.2206518106944235f,
122
62.8M
          0.0000000000000000,
123
62.8M
          0.0000000000000000,
124
62.8M
          -0.7071067811865474f,
125
62.8M
          0.6235485373547691f,
126
62.8M
          0.0000000000000000,
127
62.8M
          0.0000000000000000,
128
62.8M
          0.0000000000000000,
129
62.8M
          0.0000000000000000,
130
62.8M
          0.0000000000000000,
131
62.8M
          0.0000000000000000,
132
62.8M
          0.0000000000000000,
133
62.8M
          0.0000000000000000,
134
62.8M
          0.0000000000000000,
135
62.8M
          0.0000000000000000,
136
62.8M
      },
137
62.8M
      {
138
62.8M
          0.2500000000000000,
139
62.8M
          -0.1014005039375376f,
140
62.8M
          0.4067007583026075f,
141
62.8M
          -0.2125574805828875f,
142
62.8M
          0.0000000000000000,
143
62.8M
          -0.0643507165794627f,
144
62.8M
          -0.4517556589999482f,
145
62.8M
          -0.3046847507248690f,
146
62.8M
          0.3017929516615495f,
147
62.8M
          0.4082482904638627f,
148
62.8M
          0.1747866975480809f,
149
62.8M
          -0.2110560104933578f,
150
62.8M
          -0.1426608480880726f,
151
62.8M
          -0.1381354035075859f,
152
62.8M
          -0.1743760259965107f,
153
62.8M
          0.1135498731499434f,
154
62.8M
      },
155
62.8M
      {
156
62.8M
          0.2500000000000000,
157
62.8M
          -0.1014005039375375f,
158
62.8M
          0.4444481661973445f,
159
62.8M
          0.3085497062849767f,
160
62.8M
          0.0000000000000000f,
161
62.8M
          -0.0643507165794627f,
162
62.8M
          0.1585450355184006f,
163
62.8M
          0.5112616136591823f,
164
62.8M
          0.2579236279634118f,
165
62.8M
          0.0000000000000000,
166
62.8M
          0.0812611176717539f,
167
62.8M
          0.1856718091610980f,
168
62.8M
          -0.3416446842253372f,
169
62.8M
          0.3302282550303788f,
170
62.8M
          0.0702790691196284f,
171
62.8M
          -0.0741750459581035f,
172
62.8M
      },
173
62.8M
      {
174
62.8M
          0.2500000000000000,
175
62.8M
          0.2206518106944236f,
176
62.8M
          0.0000000000000000,
177
62.8M
          0.0000000000000000,
178
62.8M
          0.7071067811865476f,
179
62.8M
          0.6235485373547694f,
180
62.8M
          0.0000000000000000,
181
62.8M
          0.0000000000000000,
182
62.8M
          0.0000000000000000,
183
62.8M
          0.0000000000000000,
184
62.8M
          0.0000000000000000,
185
62.8M
          0.0000000000000000,
186
62.8M
          0.0000000000000000,
187
62.8M
          0.0000000000000000,
188
62.8M
          0.0000000000000000,
189
62.8M
          0.0000000000000000,
190
62.8M
      },
191
62.8M
      {
192
62.8M
          0.2500000000000000,
193
62.8M
          -0.1014005039375378f,
194
62.8M
          0.0000000000000000,
195
62.8M
          0.4706702258572536f,
196
62.8M
          0.0000000000000000,
197
62.8M
          -0.0643507165794628f,
198
62.8M
          -0.0403851516082220f,
199
62.8M
          0.0000000000000000,
200
62.8M
          0.1627234014286620f,
201
62.8M
          0.0000000000000000,
202
62.8M
          0.0000000000000000,
203
62.8M
          0.0000000000000000,
204
62.8M
          0.7367497537172237f,
205
62.8M
          0.0875511500058708f,
206
62.8M
          -0.2921026642334881f,
207
62.8M
          0.1940289303259434f,
208
62.8M
      },
209
62.8M
      {
210
62.8M
          0.2500000000000000,
211
62.8M
          -0.1014005039375377f,
212
62.8M
          0.1957439937204294f,
213
62.8M
          -0.1621205195722993f,
214
62.8M
          0.0000000000000000,
215
62.8M
          -0.0643507165794628f,
216
62.8M
          0.0074182263792424f,
217
62.8M
          -0.2904801297289980f,
218
62.8M
          0.0952002265347504f,
219
62.8M
          0.0000000000000000,
220
62.8M
          -0.3675398009862027f,
221
62.8M
          0.4921585901373873f,
222
62.8M
          0.2462710772207515f,
223
62.8M
          -0.0794670660590957f,
224
62.8M
          0.3623817333531167f,
225
62.8M
          -0.4351904965232280f,
226
62.8M
      },
227
62.8M
      {
228
62.8M
          0.2500000000000000,
229
62.8M
          -0.1014005039375376f,
230
62.8M
          0.2929100136981264f,
231
62.8M
          0.0000000000000000,
232
62.8M
          0.0000000000000000,
233
62.8M
          -0.0643507165794627f,
234
62.8M
          0.3935103426921017f,
235
62.8M
          -0.0657870154914280f,
236
62.8M
          0.0000000000000000,
237
62.8M
          -0.4082482904638628f,
238
62.8M
          -0.3078822139579090f,
239
62.8M
          -0.3852501370925192f,
240
62.8M
          -0.0857401903551931f,
241
62.8M
          -0.4613374887461511f,
242
62.8M
          0.0000000000000000,
243
62.8M
          0.2191868483885747f,
244
62.8M
      },
245
62.8M
      {
246
62.8M
          0.2500000000000000,
247
62.8M
          -0.1014005039375376f,
248
62.8M
          -0.4067007583026072f,
249
62.8M
          -0.2125574805828705f,
250
62.8M
          0.0000000000000000,
251
62.8M
          -0.0643507165794627f,
252
62.8M
          -0.4517556589999464f,
253
62.8M
          0.3046847507248840f,
254
62.8M
          0.3017929516615503f,
255
62.8M
          -0.4082482904638635f,
256
62.8M
          -0.1747866975480813f,
257
62.8M
          0.2110560104933581f,
258
62.8M
          -0.1426608480880734f,
259
62.8M
          -0.1381354035075829f,
260
62.8M
          -0.1743760259965108f,
261
62.8M
          0.1135498731499426f,
262
62.8M
      },
263
62.8M
      {
264
62.8M
          0.2500000000000000,
265
62.8M
          -0.1014005039375377f,
266
62.8M
          -0.1957439937204287f,
267
62.8M
          -0.1621205195722833f,
268
62.8M
          0.0000000000000000,
269
62.8M
          -0.0643507165794628f,
270
62.8M
          0.0074182263792444f,
271
62.8M
          0.2904801297290076f,
272
62.8M
          0.0952002265347505f,
273
62.8M
          0.0000000000000000,
274
62.8M
          0.3675398009862011f,
275
62.8M
          -0.4921585901373891f,
276
62.8M
          0.2462710772207514f,
277
62.8M
          -0.0794670660591026f,
278
62.8M
          0.3623817333531165f,
279
62.8M
          -0.4351904965232251f,
280
62.8M
      },
281
62.8M
      {
282
62.8M
          0.2500000000000000,
283
62.8M
          -0.1014005039375375f,
284
62.8M
          0.0000000000000000,
285
62.8M
          -0.4706702258572528f,
286
62.8M
          0.0000000000000000,
287
62.8M
          -0.0643507165794627f,
288
62.8M
          0.1107416575309343f,
289
62.8M
          0.0000000000000000,
290
62.8M
          -0.1627234014286617f,
291
62.8M
          0.0000000000000000,
292
62.8M
          0.0000000000000000,
293
62.8M
          0.0000000000000000,
294
62.8M
          0.1488339922711357f,
295
62.8M
          0.4972464710953509f,
296
62.8M
          0.2921026642334879f,
297
62.8M
          0.5550443808910661f,
298
62.8M
      },
299
62.8M
      {
300
62.8M
          0.2500000000000000,
301
62.8M
          -0.1014005039375377f,
302
62.8M
          0.1137907446044809f,
303
62.8M
          -0.1464291867126764f,
304
62.8M
          0.0000000000000000,
305
62.8M
          -0.0643507165794628f,
306
62.8M
          0.0829816309488205f,
307
62.8M
          -0.2388977352334460f,
308
62.8M
          -0.3531238544981630f,
309
62.8M
          -0.4082482904638630f,
310
62.8M
          0.4826689115059883f,
311
62.8M
          0.1741941265991622f,
312
62.8M
          -0.0476868035022925f,
313
62.8M
          0.1253805944856366f,
314
62.8M
          -0.4326608024727445f,
315
62.8M
          -0.2546827712406646f,
316
62.8M
      },
317
62.8M
      {
318
62.8M
          0.2500000000000000,
319
62.8M
          -0.1014005039375377f,
320
62.8M
          -0.4444481661973438f,
321
62.8M
          0.3085497062849487f,
322
62.8M
          0.0000000000000000,
323
62.8M
          -0.0643507165794628f,
324
62.8M
          0.1585450355183970f,
325
62.8M
          -0.5112616136592012f,
326
62.8M
          0.2579236279634129f,
327
62.8M
          0.0000000000000000,
328
62.8M
          -0.0812611176717504f,
329
62.8M
          -0.1856718091610990f,
330
62.8M
          -0.3416446842253373f,
331
62.8M
          0.3302282550303805f,
332
62.8M
          0.0702790691196282f,
333
62.8M
          -0.0741750459581023f,
334
62.8M
      },
335
62.8M
      {
336
62.8M
          0.2500000000000000,
337
62.8M
          -0.1014005039375376f,
338
62.8M
          -0.2929100136981264f,
339
62.8M
          0.0000000000000000,
340
62.8M
          0.0000000000000000,
341
62.8M
          -0.0643507165794627f,
342
62.8M
          0.3935103426921022f,
343
62.8M
          0.0657870154914254f,
344
62.8M
          0.0000000000000000,
345
62.8M
          0.4082482904638634f,
346
62.8M
          0.3078822139579031f,
347
62.8M
          0.3852501370925211f,
348
62.8M
          -0.0857401903551927f,
349
62.8M
          -0.4613374887461554f,
350
62.8M
          0.0000000000000000,
351
62.8M
          0.2191868483885728f,
352
62.8M
      },
353
62.8M
      {
354
62.8M
          0.2500000000000000,
355
62.8M
          -0.1014005039375376f,
356
62.8M
          -0.1137907446044814f,
357
62.8M
          -0.1464291867126654f,
358
62.8M
          0.0000000000000000,
359
62.8M
          -0.0643507165794627f,
360
62.8M
          0.0829816309488214f,
361
62.8M
          0.2388977352334547f,
362
62.8M
          -0.3531238544981624f,
363
62.8M
          0.4082482904638630f,
364
62.8M
          -0.4826689115059858f,
365
62.8M
          -0.1741941265991621f,
366
62.8M
          -0.0476868035022928f,
367
62.8M
          0.1253805944856431f,
368
62.8M
          -0.4326608024727457f,
369
62.8M
          -0.2546827712406641f,
370
62.8M
      },
371
62.8M
      {
372
62.8M
          0.2500000000000000,
373
62.8M
          -0.1014005039375374f,
374
62.8M
          0.0000000000000000,
375
62.8M
          0.4251149611657548f,
376
62.8M
          0.0000000000000000,
377
62.8M
          -0.0643507165794626f,
378
62.8M
          -0.4517556589999480f,
379
62.8M
          0.0000000000000000,
380
62.8M
          -0.6035859033230976f,
381
62.8M
          0.0000000000000000,
382
62.8M
          0.0000000000000000,
383
62.8M
          0.0000000000000000,
384
62.8M
          -0.1426608480880724f,
385
62.8M
          -0.1381354035075845f,
386
62.8M
          0.3487520519930227f,
387
62.8M
          0.1135498731499429f,
388
62.8M
      },
389
62.8M
  };
390
391
62.8M
  const HWY_CAPPED(float, 16) d;
392
188M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
125M
    auto scalar = Zero(d);
394
2.13G
    for (size_t j = 0; j < 16; j++) {
395
2.01G
      auto px = Set(d, pixels[j]);
396
2.01G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
2.01G
      scalar = MulAdd(px, basis, scalar);
398
2.01G
    }
399
125M
    Store(scalar, d, coeffs + i);
400
125M
  }
401
62.8M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
1.11M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
1.11M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
1.11M
      {
102
1.11M
          0.2500000000000000,
103
1.11M
          0.8769029297991420f,
104
1.11M
          0.0000000000000000,
105
1.11M
          0.0000000000000000,
106
1.11M
          0.0000000000000000,
107
1.11M
          -0.4105377591765233f,
108
1.11M
          0.0000000000000000,
109
1.11M
          0.0000000000000000,
110
1.11M
          0.0000000000000000,
111
1.11M
          0.0000000000000000,
112
1.11M
          0.0000000000000000,
113
1.11M
          0.0000000000000000,
114
1.11M
          0.0000000000000000,
115
1.11M
          0.0000000000000000,
116
1.11M
          0.0000000000000000,
117
1.11M
          0.0000000000000000,
118
1.11M
      },
119
1.11M
      {
120
1.11M
          0.2500000000000000,
121
1.11M
          0.2206518106944235f,
122
1.11M
          0.0000000000000000,
123
1.11M
          0.0000000000000000,
124
1.11M
          -0.7071067811865474f,
125
1.11M
          0.6235485373547691f,
126
1.11M
          0.0000000000000000,
127
1.11M
          0.0000000000000000,
128
1.11M
          0.0000000000000000,
129
1.11M
          0.0000000000000000,
130
1.11M
          0.0000000000000000,
131
1.11M
          0.0000000000000000,
132
1.11M
          0.0000000000000000,
133
1.11M
          0.0000000000000000,
134
1.11M
          0.0000000000000000,
135
1.11M
          0.0000000000000000,
136
1.11M
      },
137
1.11M
      {
138
1.11M
          0.2500000000000000,
139
1.11M
          -0.1014005039375376f,
140
1.11M
          0.4067007583026075f,
141
1.11M
          -0.2125574805828875f,
142
1.11M
          0.0000000000000000,
143
1.11M
          -0.0643507165794627f,
144
1.11M
          -0.4517556589999482f,
145
1.11M
          -0.3046847507248690f,
146
1.11M
          0.3017929516615495f,
147
1.11M
          0.4082482904638627f,
148
1.11M
          0.1747866975480809f,
149
1.11M
          -0.2110560104933578f,
150
1.11M
          -0.1426608480880726f,
151
1.11M
          -0.1381354035075859f,
152
1.11M
          -0.1743760259965107f,
153
1.11M
          0.1135498731499434f,
154
1.11M
      },
155
1.11M
      {
156
1.11M
          0.2500000000000000,
157
1.11M
          -0.1014005039375375f,
158
1.11M
          0.4444481661973445f,
159
1.11M
          0.3085497062849767f,
160
1.11M
          0.0000000000000000f,
161
1.11M
          -0.0643507165794627f,
162
1.11M
          0.1585450355184006f,
163
1.11M
          0.5112616136591823f,
164
1.11M
          0.2579236279634118f,
165
1.11M
          0.0000000000000000,
166
1.11M
          0.0812611176717539f,
167
1.11M
          0.1856718091610980f,
168
1.11M
          -0.3416446842253372f,
169
1.11M
          0.3302282550303788f,
170
1.11M
          0.0702790691196284f,
171
1.11M
          -0.0741750459581035f,
172
1.11M
      },
173
1.11M
      {
174
1.11M
          0.2500000000000000,
175
1.11M
          0.2206518106944236f,
176
1.11M
          0.0000000000000000,
177
1.11M
          0.0000000000000000,
178
1.11M
          0.7071067811865476f,
179
1.11M
          0.6235485373547694f,
180
1.11M
          0.0000000000000000,
181
1.11M
          0.0000000000000000,
182
1.11M
          0.0000000000000000,
183
1.11M
          0.0000000000000000,
184
1.11M
          0.0000000000000000,
185
1.11M
          0.0000000000000000,
186
1.11M
          0.0000000000000000,
187
1.11M
          0.0000000000000000,
188
1.11M
          0.0000000000000000,
189
1.11M
          0.0000000000000000,
190
1.11M
      },
191
1.11M
      {
192
1.11M
          0.2500000000000000,
193
1.11M
          -0.1014005039375378f,
194
1.11M
          0.0000000000000000,
195
1.11M
          0.4706702258572536f,
196
1.11M
          0.0000000000000000,
197
1.11M
          -0.0643507165794628f,
198
1.11M
          -0.0403851516082220f,
199
1.11M
          0.0000000000000000,
200
1.11M
          0.1627234014286620f,
201
1.11M
          0.0000000000000000,
202
1.11M
          0.0000000000000000,
203
1.11M
          0.0000000000000000,
204
1.11M
          0.7367497537172237f,
205
1.11M
          0.0875511500058708f,
206
1.11M
          -0.2921026642334881f,
207
1.11M
          0.1940289303259434f,
208
1.11M
      },
209
1.11M
      {
210
1.11M
          0.2500000000000000,
211
1.11M
          -0.1014005039375377f,
212
1.11M
          0.1957439937204294f,
213
1.11M
          -0.1621205195722993f,
214
1.11M
          0.0000000000000000,
215
1.11M
          -0.0643507165794628f,
216
1.11M
          0.0074182263792424f,
217
1.11M
          -0.2904801297289980f,
218
1.11M
          0.0952002265347504f,
219
1.11M
          0.0000000000000000,
220
1.11M
          -0.3675398009862027f,
221
1.11M
          0.4921585901373873f,
222
1.11M
          0.2462710772207515f,
223
1.11M
          -0.0794670660590957f,
224
1.11M
          0.3623817333531167f,
225
1.11M
          -0.4351904965232280f,
226
1.11M
      },
227
1.11M
      {
228
1.11M
          0.2500000000000000,
229
1.11M
          -0.1014005039375376f,
230
1.11M
          0.2929100136981264f,
231
1.11M
          0.0000000000000000,
232
1.11M
          0.0000000000000000,
233
1.11M
          -0.0643507165794627f,
234
1.11M
          0.3935103426921017f,
235
1.11M
          -0.0657870154914280f,
236
1.11M
          0.0000000000000000,
237
1.11M
          -0.4082482904638628f,
238
1.11M
          -0.3078822139579090f,
239
1.11M
          -0.3852501370925192f,
240
1.11M
          -0.0857401903551931f,
241
1.11M
          -0.4613374887461511f,
242
1.11M
          0.0000000000000000,
243
1.11M
          0.2191868483885747f,
244
1.11M
      },
245
1.11M
      {
246
1.11M
          0.2500000000000000,
247
1.11M
          -0.1014005039375376f,
248
1.11M
          -0.4067007583026072f,
249
1.11M
          -0.2125574805828705f,
250
1.11M
          0.0000000000000000,
251
1.11M
          -0.0643507165794627f,
252
1.11M
          -0.4517556589999464f,
253
1.11M
          0.3046847507248840f,
254
1.11M
          0.3017929516615503f,
255
1.11M
          -0.4082482904638635f,
256
1.11M
          -0.1747866975480813f,
257
1.11M
          0.2110560104933581f,
258
1.11M
          -0.1426608480880734f,
259
1.11M
          -0.1381354035075829f,
260
1.11M
          -0.1743760259965108f,
261
1.11M
          0.1135498731499426f,
262
1.11M
      },
263
1.11M
      {
264
1.11M
          0.2500000000000000,
265
1.11M
          -0.1014005039375377f,
266
1.11M
          -0.1957439937204287f,
267
1.11M
          -0.1621205195722833f,
268
1.11M
          0.0000000000000000,
269
1.11M
          -0.0643507165794628f,
270
1.11M
          0.0074182263792444f,
271
1.11M
          0.2904801297290076f,
272
1.11M
          0.0952002265347505f,
273
1.11M
          0.0000000000000000,
274
1.11M
          0.3675398009862011f,
275
1.11M
          -0.4921585901373891f,
276
1.11M
          0.2462710772207514f,
277
1.11M
          -0.0794670660591026f,
278
1.11M
          0.3623817333531165f,
279
1.11M
          -0.4351904965232251f,
280
1.11M
      },
281
1.11M
      {
282
1.11M
          0.2500000000000000,
283
1.11M
          -0.1014005039375375f,
284
1.11M
          0.0000000000000000,
285
1.11M
          -0.4706702258572528f,
286
1.11M
          0.0000000000000000,
287
1.11M
          -0.0643507165794627f,
288
1.11M
          0.1107416575309343f,
289
1.11M
          0.0000000000000000,
290
1.11M
          -0.1627234014286617f,
291
1.11M
          0.0000000000000000,
292
1.11M
          0.0000000000000000,
293
1.11M
          0.0000000000000000,
294
1.11M
          0.1488339922711357f,
295
1.11M
          0.4972464710953509f,
296
1.11M
          0.2921026642334879f,
297
1.11M
          0.5550443808910661f,
298
1.11M
      },
299
1.11M
      {
300
1.11M
          0.2500000000000000,
301
1.11M
          -0.1014005039375377f,
302
1.11M
          0.1137907446044809f,
303
1.11M
          -0.1464291867126764f,
304
1.11M
          0.0000000000000000,
305
1.11M
          -0.0643507165794628f,
306
1.11M
          0.0829816309488205f,
307
1.11M
          -0.2388977352334460f,
308
1.11M
          -0.3531238544981630f,
309
1.11M
          -0.4082482904638630f,
310
1.11M
          0.4826689115059883f,
311
1.11M
          0.1741941265991622f,
312
1.11M
          -0.0476868035022925f,
313
1.11M
          0.1253805944856366f,
314
1.11M
          -0.4326608024727445f,
315
1.11M
          -0.2546827712406646f,
316
1.11M
      },
317
1.11M
      {
318
1.11M
          0.2500000000000000,
319
1.11M
          -0.1014005039375377f,
320
1.11M
          -0.4444481661973438f,
321
1.11M
          0.3085497062849487f,
322
1.11M
          0.0000000000000000,
323
1.11M
          -0.0643507165794628f,
324
1.11M
          0.1585450355183970f,
325
1.11M
          -0.5112616136592012f,
326
1.11M
          0.2579236279634129f,
327
1.11M
          0.0000000000000000,
328
1.11M
          -0.0812611176717504f,
329
1.11M
          -0.1856718091610990f,
330
1.11M
          -0.3416446842253373f,
331
1.11M
          0.3302282550303805f,
332
1.11M
          0.0702790691196282f,
333
1.11M
          -0.0741750459581023f,
334
1.11M
      },
335
1.11M
      {
336
1.11M
          0.2500000000000000,
337
1.11M
          -0.1014005039375376f,
338
1.11M
          -0.2929100136981264f,
339
1.11M
          0.0000000000000000,
340
1.11M
          0.0000000000000000,
341
1.11M
          -0.0643507165794627f,
342
1.11M
          0.3935103426921022f,
343
1.11M
          0.0657870154914254f,
344
1.11M
          0.0000000000000000,
345
1.11M
          0.4082482904638634f,
346
1.11M
          0.3078822139579031f,
347
1.11M
          0.3852501370925211f,
348
1.11M
          -0.0857401903551927f,
349
1.11M
          -0.4613374887461554f,
350
1.11M
          0.0000000000000000,
351
1.11M
          0.2191868483885728f,
352
1.11M
      },
353
1.11M
      {
354
1.11M
          0.2500000000000000,
355
1.11M
          -0.1014005039375376f,
356
1.11M
          -0.1137907446044814f,
357
1.11M
          -0.1464291867126654f,
358
1.11M
          0.0000000000000000,
359
1.11M
          -0.0643507165794627f,
360
1.11M
          0.0829816309488214f,
361
1.11M
          0.2388977352334547f,
362
1.11M
          -0.3531238544981624f,
363
1.11M
          0.4082482904638630f,
364
1.11M
          -0.4826689115059858f,
365
1.11M
          -0.1741941265991621f,
366
1.11M
          -0.0476868035022928f,
367
1.11M
          0.1253805944856431f,
368
1.11M
          -0.4326608024727457f,
369
1.11M
          -0.2546827712406641f,
370
1.11M
      },
371
1.11M
      {
372
1.11M
          0.2500000000000000,
373
1.11M
          -0.1014005039375374f,
374
1.11M
          0.0000000000000000,
375
1.11M
          0.4251149611657548f,
376
1.11M
          0.0000000000000000,
377
1.11M
          -0.0643507165794626f,
378
1.11M
          -0.4517556589999480f,
379
1.11M
          0.0000000000000000,
380
1.11M
          -0.6035859033230976f,
381
1.11M
          0.0000000000000000,
382
1.11M
          0.0000000000000000,
383
1.11M
          0.0000000000000000,
384
1.11M
          -0.1426608480880724f,
385
1.11M
          -0.1381354035075845f,
386
1.11M
          0.3487520519930227f,
387
1.11M
          0.1135498731499429f,
388
1.11M
      },
389
1.11M
  };
390
391
1.11M
  const HWY_CAPPED(float, 16) d;
392
3.33M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
2.22M
    auto scalar = Zero(d);
394
37.7M
    for (size_t j = 0; j < 16; j++) {
395
35.5M
      auto px = Set(d, pixels[j]);
396
35.5M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
35.5M
      scalar = MulAdd(px, basis, scalar);
398
35.5M
    }
399
2.22M
    Store(scalar, d, coeffs + i);
400
2.22M
  }
401
1.11M
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
60.6M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
60.6M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
60.6M
      {
102
60.6M
          0.2500000000000000,
103
60.6M
          0.8769029297991420f,
104
60.6M
          0.0000000000000000,
105
60.6M
          0.0000000000000000,
106
60.6M
          0.0000000000000000,
107
60.6M
          -0.4105377591765233f,
108
60.6M
          0.0000000000000000,
109
60.6M
          0.0000000000000000,
110
60.6M
          0.0000000000000000,
111
60.6M
          0.0000000000000000,
112
60.6M
          0.0000000000000000,
113
60.6M
          0.0000000000000000,
114
60.6M
          0.0000000000000000,
115
60.6M
          0.0000000000000000,
116
60.6M
          0.0000000000000000,
117
60.6M
          0.0000000000000000,
118
60.6M
      },
119
60.6M
      {
120
60.6M
          0.2500000000000000,
121
60.6M
          0.2206518106944235f,
122
60.6M
          0.0000000000000000,
123
60.6M
          0.0000000000000000,
124
60.6M
          -0.7071067811865474f,
125
60.6M
          0.6235485373547691f,
126
60.6M
          0.0000000000000000,
127
60.6M
          0.0000000000000000,
128
60.6M
          0.0000000000000000,
129
60.6M
          0.0000000000000000,
130
60.6M
          0.0000000000000000,
131
60.6M
          0.0000000000000000,
132
60.6M
          0.0000000000000000,
133
60.6M
          0.0000000000000000,
134
60.6M
          0.0000000000000000,
135
60.6M
          0.0000000000000000,
136
60.6M
      },
137
60.6M
      {
138
60.6M
          0.2500000000000000,
139
60.6M
          -0.1014005039375376f,
140
60.6M
          0.4067007583026075f,
141
60.6M
          -0.2125574805828875f,
142
60.6M
          0.0000000000000000,
143
60.6M
          -0.0643507165794627f,
144
60.6M
          -0.4517556589999482f,
145
60.6M
          -0.3046847507248690f,
146
60.6M
          0.3017929516615495f,
147
60.6M
          0.4082482904638627f,
148
60.6M
          0.1747866975480809f,
149
60.6M
          -0.2110560104933578f,
150
60.6M
          -0.1426608480880726f,
151
60.6M
          -0.1381354035075859f,
152
60.6M
          -0.1743760259965107f,
153
60.6M
          0.1135498731499434f,
154
60.6M
      },
155
60.6M
      {
156
60.6M
          0.2500000000000000,
157
60.6M
          -0.1014005039375375f,
158
60.6M
          0.4444481661973445f,
159
60.6M
          0.3085497062849767f,
160
60.6M
          0.0000000000000000f,
161
60.6M
          -0.0643507165794627f,
162
60.6M
          0.1585450355184006f,
163
60.6M
          0.5112616136591823f,
164
60.6M
          0.2579236279634118f,
165
60.6M
          0.0000000000000000,
166
60.6M
          0.0812611176717539f,
167
60.6M
          0.1856718091610980f,
168
60.6M
          -0.3416446842253372f,
169
60.6M
          0.3302282550303788f,
170
60.6M
          0.0702790691196284f,
171
60.6M
          -0.0741750459581035f,
172
60.6M
      },
173
60.6M
      {
174
60.6M
          0.2500000000000000,
175
60.6M
          0.2206518106944236f,
176
60.6M
          0.0000000000000000,
177
60.6M
          0.0000000000000000,
178
60.6M
          0.7071067811865476f,
179
60.6M
          0.6235485373547694f,
180
60.6M
          0.0000000000000000,
181
60.6M
          0.0000000000000000,
182
60.6M
          0.0000000000000000,
183
60.6M
          0.0000000000000000,
184
60.6M
          0.0000000000000000,
185
60.6M
          0.0000000000000000,
186
60.6M
          0.0000000000000000,
187
60.6M
          0.0000000000000000,
188
60.6M
          0.0000000000000000,
189
60.6M
          0.0000000000000000,
190
60.6M
      },
191
60.6M
      {
192
60.6M
          0.2500000000000000,
193
60.6M
          -0.1014005039375378f,
194
60.6M
          0.0000000000000000,
195
60.6M
          0.4706702258572536f,
196
60.6M
          0.0000000000000000,
197
60.6M
          -0.0643507165794628f,
198
60.6M
          -0.0403851516082220f,
199
60.6M
          0.0000000000000000,
200
60.6M
          0.1627234014286620f,
201
60.6M
          0.0000000000000000,
202
60.6M
          0.0000000000000000,
203
60.6M
          0.0000000000000000,
204
60.6M
          0.7367497537172237f,
205
60.6M
          0.0875511500058708f,
206
60.6M
          -0.2921026642334881f,
207
60.6M
          0.1940289303259434f,
208
60.6M
      },
209
60.6M
      {
210
60.6M
          0.2500000000000000,
211
60.6M
          -0.1014005039375377f,
212
60.6M
          0.1957439937204294f,
213
60.6M
          -0.1621205195722993f,
214
60.6M
          0.0000000000000000,
215
60.6M
          -0.0643507165794628f,
216
60.6M
          0.0074182263792424f,
217
60.6M
          -0.2904801297289980f,
218
60.6M
          0.0952002265347504f,
219
60.6M
          0.0000000000000000,
220
60.6M
          -0.3675398009862027f,
221
60.6M
          0.4921585901373873f,
222
60.6M
          0.2462710772207515f,
223
60.6M
          -0.0794670660590957f,
224
60.6M
          0.3623817333531167f,
225
60.6M
          -0.4351904965232280f,
226
60.6M
      },
227
60.6M
      {
228
60.6M
          0.2500000000000000,
229
60.6M
          -0.1014005039375376f,
230
60.6M
          0.2929100136981264f,
231
60.6M
          0.0000000000000000,
232
60.6M
          0.0000000000000000,
233
60.6M
          -0.0643507165794627f,
234
60.6M
          0.3935103426921017f,
235
60.6M
          -0.0657870154914280f,
236
60.6M
          0.0000000000000000,
237
60.6M
          -0.4082482904638628f,
238
60.6M
          -0.3078822139579090f,
239
60.6M
          -0.3852501370925192f,
240
60.6M
          -0.0857401903551931f,
241
60.6M
          -0.4613374887461511f,
242
60.6M
          0.0000000000000000,
243
60.6M
          0.2191868483885747f,
244
60.6M
      },
245
60.6M
      {
246
60.6M
          0.2500000000000000,
247
60.6M
          -0.1014005039375376f,
248
60.6M
          -0.4067007583026072f,
249
60.6M
          -0.2125574805828705f,
250
60.6M
          0.0000000000000000,
251
60.6M
          -0.0643507165794627f,
252
60.6M
          -0.4517556589999464f,
253
60.6M
          0.3046847507248840f,
254
60.6M
          0.3017929516615503f,
255
60.6M
          -0.4082482904638635f,
256
60.6M
          -0.1747866975480813f,
257
60.6M
          0.2110560104933581f,
258
60.6M
          -0.1426608480880734f,
259
60.6M
          -0.1381354035075829f,
260
60.6M
          -0.1743760259965108f,
261
60.6M
          0.1135498731499426f,
262
60.6M
      },
263
60.6M
      {
264
60.6M
          0.2500000000000000,
265
60.6M
          -0.1014005039375377f,
266
60.6M
          -0.1957439937204287f,
267
60.6M
          -0.1621205195722833f,
268
60.6M
          0.0000000000000000,
269
60.6M
          -0.0643507165794628f,
270
60.6M
          0.0074182263792444f,
271
60.6M
          0.2904801297290076f,
272
60.6M
          0.0952002265347505f,
273
60.6M
          0.0000000000000000,
274
60.6M
          0.3675398009862011f,
275
60.6M
          -0.4921585901373891f,
276
60.6M
          0.2462710772207514f,
277
60.6M
          -0.0794670660591026f,
278
60.6M
          0.3623817333531165f,
279
60.6M
          -0.4351904965232251f,
280
60.6M
      },
281
60.6M
      {
282
60.6M
          0.2500000000000000,
283
60.6M
          -0.1014005039375375f,
284
60.6M
          0.0000000000000000,
285
60.6M
          -0.4706702258572528f,
286
60.6M
          0.0000000000000000,
287
60.6M
          -0.0643507165794627f,
288
60.6M
          0.1107416575309343f,
289
60.6M
          0.0000000000000000,
290
60.6M
          -0.1627234014286617f,
291
60.6M
          0.0000000000000000,
292
60.6M
          0.0000000000000000,
293
60.6M
          0.0000000000000000,
294
60.6M
          0.1488339922711357f,
295
60.6M
          0.4972464710953509f,
296
60.6M
          0.2921026642334879f,
297
60.6M
          0.5550443808910661f,
298
60.6M
      },
299
60.6M
      {
300
60.6M
          0.2500000000000000,
301
60.6M
          -0.1014005039375377f,
302
60.6M
          0.1137907446044809f,
303
60.6M
          -0.1464291867126764f,
304
60.6M
          0.0000000000000000,
305
60.6M
          -0.0643507165794628f,
306
60.6M
          0.0829816309488205f,
307
60.6M
          -0.2388977352334460f,
308
60.6M
          -0.3531238544981630f,
309
60.6M
          -0.4082482904638630f,
310
60.6M
          0.4826689115059883f,
311
60.6M
          0.1741941265991622f,
312
60.6M
          -0.0476868035022925f,
313
60.6M
          0.1253805944856366f,
314
60.6M
          -0.4326608024727445f,
315
60.6M
          -0.2546827712406646f,
316
60.6M
      },
317
60.6M
      {
318
60.6M
          0.2500000000000000,
319
60.6M
          -0.1014005039375377f,
320
60.6M
          -0.4444481661973438f,
321
60.6M
          0.3085497062849487f,
322
60.6M
          0.0000000000000000,
323
60.6M
          -0.0643507165794628f,
324
60.6M
          0.1585450355183970f,
325
60.6M
          -0.5112616136592012f,
326
60.6M
          0.2579236279634129f,
327
60.6M
          0.0000000000000000,
328
60.6M
          -0.0812611176717504f,
329
60.6M
          -0.1856718091610990f,
330
60.6M
          -0.3416446842253373f,
331
60.6M
          0.3302282550303805f,
332
60.6M
          0.0702790691196282f,
333
60.6M
          -0.0741750459581023f,
334
60.6M
      },
335
60.6M
      {
336
60.6M
          0.2500000000000000,
337
60.6M
          -0.1014005039375376f,
338
60.6M
          -0.2929100136981264f,
339
60.6M
          0.0000000000000000,
340
60.6M
          0.0000000000000000,
341
60.6M
          -0.0643507165794627f,
342
60.6M
          0.3935103426921022f,
343
60.6M
          0.0657870154914254f,
344
60.6M
          0.0000000000000000,
345
60.6M
          0.4082482904638634f,
346
60.6M
          0.3078822139579031f,
347
60.6M
          0.3852501370925211f,
348
60.6M
          -0.0857401903551927f,
349
60.6M
          -0.4613374887461554f,
350
60.6M
          0.0000000000000000,
351
60.6M
          0.2191868483885728f,
352
60.6M
      },
353
60.6M
      {
354
60.6M
          0.2500000000000000,
355
60.6M
          -0.1014005039375376f,
356
60.6M
          -0.1137907446044814f,
357
60.6M
          -0.1464291867126654f,
358
60.6M
          0.0000000000000000,
359
60.6M
          -0.0643507165794627f,
360
60.6M
          0.0829816309488214f,
361
60.6M
          0.2388977352334547f,
362
60.6M
          -0.3531238544981624f,
363
60.6M
          0.4082482904638630f,
364
60.6M
          -0.4826689115059858f,
365
60.6M
          -0.1741941265991621f,
366
60.6M
          -0.0476868035022928f,
367
60.6M
          0.1253805944856431f,
368
60.6M
          -0.4326608024727457f,
369
60.6M
          -0.2546827712406641f,
370
60.6M
      },
371
60.6M
      {
372
60.6M
          0.2500000000000000,
373
60.6M
          -0.1014005039375374f,
374
60.6M
          0.0000000000000000,
375
60.6M
          0.4251149611657548f,
376
60.6M
          0.0000000000000000,
377
60.6M
          -0.0643507165794626f,
378
60.6M
          -0.4517556589999480f,
379
60.6M
          0.0000000000000000,
380
60.6M
          -0.6035859033230976f,
381
60.6M
          0.0000000000000000,
382
60.6M
          0.0000000000000000,
383
60.6M
          0.0000000000000000,
384
60.6M
          -0.1426608480880724f,
385
60.6M
          -0.1381354035075845f,
386
60.6M
          0.3487520519930227f,
387
60.6M
          0.1135498731499429f,
388
60.6M
      },
389
60.6M
  };
390
391
60.6M
  const HWY_CAPPED(float, 16) d;
392
181M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
121M
    auto scalar = Zero(d);
394
2.06G
    for (size_t j = 0; j < 16; j++) {
395
1.94G
      auto px = Set(d, pixels[j]);
396
1.94G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
1.94G
      scalar = MulAdd(px, basis, scalar);
398
1.94G
    }
399
121M
    Store(scalar, d, coeffs + i);
400
121M
  }
401
60.6M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
1.11M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
1.11M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
1.11M
      {
102
1.11M
          0.2500000000000000,
103
1.11M
          0.8769029297991420f,
104
1.11M
          0.0000000000000000,
105
1.11M
          0.0000000000000000,
106
1.11M
          0.0000000000000000,
107
1.11M
          -0.4105377591765233f,
108
1.11M
          0.0000000000000000,
109
1.11M
          0.0000000000000000,
110
1.11M
          0.0000000000000000,
111
1.11M
          0.0000000000000000,
112
1.11M
          0.0000000000000000,
113
1.11M
          0.0000000000000000,
114
1.11M
          0.0000000000000000,
115
1.11M
          0.0000000000000000,
116
1.11M
          0.0000000000000000,
117
1.11M
          0.0000000000000000,
118
1.11M
      },
119
1.11M
      {
120
1.11M
          0.2500000000000000,
121
1.11M
          0.2206518106944235f,
122
1.11M
          0.0000000000000000,
123
1.11M
          0.0000000000000000,
124
1.11M
          -0.7071067811865474f,
125
1.11M
          0.6235485373547691f,
126
1.11M
          0.0000000000000000,
127
1.11M
          0.0000000000000000,
128
1.11M
          0.0000000000000000,
129
1.11M
          0.0000000000000000,
130
1.11M
          0.0000000000000000,
131
1.11M
          0.0000000000000000,
132
1.11M
          0.0000000000000000,
133
1.11M
          0.0000000000000000,
134
1.11M
          0.0000000000000000,
135
1.11M
          0.0000000000000000,
136
1.11M
      },
137
1.11M
      {
138
1.11M
          0.2500000000000000,
139
1.11M
          -0.1014005039375376f,
140
1.11M
          0.4067007583026075f,
141
1.11M
          -0.2125574805828875f,
142
1.11M
          0.0000000000000000,
143
1.11M
          -0.0643507165794627f,
144
1.11M
          -0.4517556589999482f,
145
1.11M
          -0.3046847507248690f,
146
1.11M
          0.3017929516615495f,
147
1.11M
          0.4082482904638627f,
148
1.11M
          0.1747866975480809f,
149
1.11M
          -0.2110560104933578f,
150
1.11M
          -0.1426608480880726f,
151
1.11M
          -0.1381354035075859f,
152
1.11M
          -0.1743760259965107f,
153
1.11M
          0.1135498731499434f,
154
1.11M
      },
155
1.11M
      {
156
1.11M
          0.2500000000000000,
157
1.11M
          -0.1014005039375375f,
158
1.11M
          0.4444481661973445f,
159
1.11M
          0.3085497062849767f,
160
1.11M
          0.0000000000000000f,
161
1.11M
          -0.0643507165794627f,
162
1.11M
          0.1585450355184006f,
163
1.11M
          0.5112616136591823f,
164
1.11M
          0.2579236279634118f,
165
1.11M
          0.0000000000000000,
166
1.11M
          0.0812611176717539f,
167
1.11M
          0.1856718091610980f,
168
1.11M
          -0.3416446842253372f,
169
1.11M
          0.3302282550303788f,
170
1.11M
          0.0702790691196284f,
171
1.11M
          -0.0741750459581035f,
172
1.11M
      },
173
1.11M
      {
174
1.11M
          0.2500000000000000,
175
1.11M
          0.2206518106944236f,
176
1.11M
          0.0000000000000000,
177
1.11M
          0.0000000000000000,
178
1.11M
          0.7071067811865476f,
179
1.11M
          0.6235485373547694f,
180
1.11M
          0.0000000000000000,
181
1.11M
          0.0000000000000000,
182
1.11M
          0.0000000000000000,
183
1.11M
          0.0000000000000000,
184
1.11M
          0.0000000000000000,
185
1.11M
          0.0000000000000000,
186
1.11M
          0.0000000000000000,
187
1.11M
          0.0000000000000000,
188
1.11M
          0.0000000000000000,
189
1.11M
          0.0000000000000000,
190
1.11M
      },
191
1.11M
      {
192
1.11M
          0.2500000000000000,
193
1.11M
          -0.1014005039375378f,
194
1.11M
          0.0000000000000000,
195
1.11M
          0.4706702258572536f,
196
1.11M
          0.0000000000000000,
197
1.11M
          -0.0643507165794628f,
198
1.11M
          -0.0403851516082220f,
199
1.11M
          0.0000000000000000,
200
1.11M
          0.1627234014286620f,
201
1.11M
          0.0000000000000000,
202
1.11M
          0.0000000000000000,
203
1.11M
          0.0000000000000000,
204
1.11M
          0.7367497537172237f,
205
1.11M
          0.0875511500058708f,
206
1.11M
          -0.2921026642334881f,
207
1.11M
          0.1940289303259434f,
208
1.11M
      },
209
1.11M
      {
210
1.11M
          0.2500000000000000,
211
1.11M
          -0.1014005039375377f,
212
1.11M
          0.1957439937204294f,
213
1.11M
          -0.1621205195722993f,
214
1.11M
          0.0000000000000000,
215
1.11M
          -0.0643507165794628f,
216
1.11M
          0.0074182263792424f,
217
1.11M
          -0.2904801297289980f,
218
1.11M
          0.0952002265347504f,
219
1.11M
          0.0000000000000000,
220
1.11M
          -0.3675398009862027f,
221
1.11M
          0.4921585901373873f,
222
1.11M
          0.2462710772207515f,
223
1.11M
          -0.0794670660590957f,
224
1.11M
          0.3623817333531167f,
225
1.11M
          -0.4351904965232280f,
226
1.11M
      },
227
1.11M
      {
228
1.11M
          0.2500000000000000,
229
1.11M
          -0.1014005039375376f,
230
1.11M
          0.2929100136981264f,
231
1.11M
          0.0000000000000000,
232
1.11M
          0.0000000000000000,
233
1.11M
          -0.0643507165794627f,
234
1.11M
          0.3935103426921017f,
235
1.11M
          -0.0657870154914280f,
236
1.11M
          0.0000000000000000,
237
1.11M
          -0.4082482904638628f,
238
1.11M
          -0.3078822139579090f,
239
1.11M
          -0.3852501370925192f,
240
1.11M
          -0.0857401903551931f,
241
1.11M
          -0.4613374887461511f,
242
1.11M
          0.0000000000000000,
243
1.11M
          0.2191868483885747f,
244
1.11M
      },
245
1.11M
      {
246
1.11M
          0.2500000000000000,
247
1.11M
          -0.1014005039375376f,
248
1.11M
          -0.4067007583026072f,
249
1.11M
          -0.2125574805828705f,
250
1.11M
          0.0000000000000000,
251
1.11M
          -0.0643507165794627f,
252
1.11M
          -0.4517556589999464f,
253
1.11M
          0.3046847507248840f,
254
1.11M
          0.3017929516615503f,
255
1.11M
          -0.4082482904638635f,
256
1.11M
          -0.1747866975480813f,
257
1.11M
          0.2110560104933581f,
258
1.11M
          -0.1426608480880734f,
259
1.11M
          -0.1381354035075829f,
260
1.11M
          -0.1743760259965108f,
261
1.11M
          0.1135498731499426f,
262
1.11M
      },
263
1.11M
      {
264
1.11M
          0.2500000000000000,
265
1.11M
          -0.1014005039375377f,
266
1.11M
          -0.1957439937204287f,
267
1.11M
          -0.1621205195722833f,
268
1.11M
          0.0000000000000000,
269
1.11M
          -0.0643507165794628f,
270
1.11M
          0.0074182263792444f,
271
1.11M
          0.2904801297290076f,
272
1.11M
          0.0952002265347505f,
273
1.11M
          0.0000000000000000,
274
1.11M
          0.3675398009862011f,
275
1.11M
          -0.4921585901373891f,
276
1.11M
          0.2462710772207514f,
277
1.11M
          -0.0794670660591026f,
278
1.11M
          0.3623817333531165f,
279
1.11M
          -0.4351904965232251f,
280
1.11M
      },
281
1.11M
      {
282
1.11M
          0.2500000000000000,
283
1.11M
          -0.1014005039375375f,
284
1.11M
          0.0000000000000000,
285
1.11M
          -0.4706702258572528f,
286
1.11M
          0.0000000000000000,
287
1.11M
          -0.0643507165794627f,
288
1.11M
          0.1107416575309343f,
289
1.11M
          0.0000000000000000,
290
1.11M
          -0.1627234014286617f,
291
1.11M
          0.0000000000000000,
292
1.11M
          0.0000000000000000,
293
1.11M
          0.0000000000000000,
294
1.11M
          0.1488339922711357f,
295
1.11M
          0.4972464710953509f,
296
1.11M
          0.2921026642334879f,
297
1.11M
          0.5550443808910661f,
298
1.11M
      },
299
1.11M
      {
300
1.11M
          0.2500000000000000,
301
1.11M
          -0.1014005039375377f,
302
1.11M
          0.1137907446044809f,
303
1.11M
          -0.1464291867126764f,
304
1.11M
          0.0000000000000000,
305
1.11M
          -0.0643507165794628f,
306
1.11M
          0.0829816309488205f,
307
1.11M
          -0.2388977352334460f,
308
1.11M
          -0.3531238544981630f,
309
1.11M
          -0.4082482904638630f,
310
1.11M
          0.4826689115059883f,
311
1.11M
          0.1741941265991622f,
312
1.11M
          -0.0476868035022925f,
313
1.11M
          0.1253805944856366f,
314
1.11M
          -0.4326608024727445f,
315
1.11M
          -0.2546827712406646f,
316
1.11M
      },
317
1.11M
      {
318
1.11M
          0.2500000000000000,
319
1.11M
          -0.1014005039375377f,
320
1.11M
          -0.4444481661973438f,
321
1.11M
          0.3085497062849487f,
322
1.11M
          0.0000000000000000,
323
1.11M
          -0.0643507165794628f,
324
1.11M
          0.1585450355183970f,
325
1.11M
          -0.5112616136592012f,
326
1.11M
          0.2579236279634129f,
327
1.11M
          0.0000000000000000,
328
1.11M
          -0.0812611176717504f,
329
1.11M
          -0.1856718091610990f,
330
1.11M
          -0.3416446842253373f,
331
1.11M
          0.3302282550303805f,
332
1.11M
          0.0702790691196282f,
333
1.11M
          -0.0741750459581023f,
334
1.11M
      },
335
1.11M
      {
336
1.11M
          0.2500000000000000,
337
1.11M
          -0.1014005039375376f,
338
1.11M
          -0.2929100136981264f,
339
1.11M
          0.0000000000000000,
340
1.11M
          0.0000000000000000,
341
1.11M
          -0.0643507165794627f,
342
1.11M
          0.3935103426921022f,
343
1.11M
          0.0657870154914254f,
344
1.11M
          0.0000000000000000,
345
1.11M
          0.4082482904638634f,
346
1.11M
          0.3078822139579031f,
347
1.11M
          0.3852501370925211f,
348
1.11M
          -0.0857401903551927f,
349
1.11M
          -0.4613374887461554f,
350
1.11M
          0.0000000000000000,
351
1.11M
          0.2191868483885728f,
352
1.11M
      },
353
1.11M
      {
354
1.11M
          0.2500000000000000,
355
1.11M
          -0.1014005039375376f,
356
1.11M
          -0.1137907446044814f,
357
1.11M
          -0.1464291867126654f,
358
1.11M
          0.0000000000000000,
359
1.11M
          -0.0643507165794627f,
360
1.11M
          0.0829816309488214f,
361
1.11M
          0.2388977352334547f,
362
1.11M
          -0.3531238544981624f,
363
1.11M
          0.4082482904638630f,
364
1.11M
          -0.4826689115059858f,
365
1.11M
          -0.1741941265991621f,
366
1.11M
          -0.0476868035022928f,
367
1.11M
          0.1253805944856431f,
368
1.11M
          -0.4326608024727457f,
369
1.11M
          -0.2546827712406641f,
370
1.11M
      },
371
1.11M
      {
372
1.11M
          0.2500000000000000,
373
1.11M
          -0.1014005039375374f,
374
1.11M
          0.0000000000000000,
375
1.11M
          0.4251149611657548f,
376
1.11M
          0.0000000000000000,
377
1.11M
          -0.0643507165794626f,
378
1.11M
          -0.4517556589999480f,
379
1.11M
          0.0000000000000000,
380
1.11M
          -0.6035859033230976f,
381
1.11M
          0.0000000000000000,
382
1.11M
          0.0000000000000000,
383
1.11M
          0.0000000000000000,
384
1.11M
          -0.1426608480880724f,
385
1.11M
          -0.1381354035075845f,
386
1.11M
          0.3487520519930227f,
387
1.11M
          0.1135498731499429f,
388
1.11M
      },
389
1.11M
  };
390
391
1.11M
  const HWY_CAPPED(float, 16) d;
392
3.33M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
2.22M
    auto scalar = Zero(d);
394
37.7M
    for (size_t j = 0; j < 16; j++) {
395
35.5M
      auto px = Set(d, pixels[j]);
396
35.5M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
35.5M
      scalar = MulAdd(px, basis, scalar);
398
35.5M
    }
399
2.22M
    Store(scalar, d, coeffs + i);
400
2.22M
  }
401
1.11M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
402
403
// Coefficient layout:
404
//  - (even, even) positions hold AFV coefficients
405
//  - (odd, even) positions hold DCT4x4 coefficients
406
//  - (any, odd) positions hold DCT4x8 coefficients
407
template <size_t afv_kind>
408
void AFVTransformFromPixels(const float* JXL_RESTRICT pixels,
409
                            size_t pixels_stride,
410
62.8M
                            float* JXL_RESTRICT coefficients) {
411
62.8M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
62.8M
  size_t afv_x = afv_kind & 1;
413
62.8M
  size_t afv_y = afv_kind / 2;
414
62.8M
  HWY_ALIGN float block[4 * 8] = {};
415
314M
  for (size_t iy = 0; iy < 4; iy++) {
416
1.25G
    for (size_t ix = 0; ix < 4; ix++) {
417
1.00G
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.00G
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.00G
    }
420
251M
  }
421
  // AFV coefficients in (even, even) positions.
422
62.8M
  HWY_ALIGN float coeff[4 * 4];
423
62.8M
  AFVDCT4x4(block, coeff);
424
314M
  for (size_t iy = 0; iy < 4; iy++) {
425
1.25G
    for (size_t ix = 0; ix < 4; ix++) {
426
1.00G
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.00G
    }
428
251M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
62.8M
  ComputeScaledDCT<4, 4>()(
431
62.8M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
62.8M
              pixels_stride),
433
62.8M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
314M
  for (size_t iy = 0; iy < 4; iy++) {
436
2.26G
    for (size_t ix = 0; ix < 8; ix++) {
437
2.01G
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.01G
    }
439
251M
  }
440
  // 4x8 DCT of the other half of the block.
441
62.8M
  ComputeScaledDCT<4, 8>()(
442
62.8M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
62.8M
      block, scratch_space);
444
314M
  for (size_t iy = 0; iy < 4; iy++) {
445
2.26G
    for (size_t ix = 0; ix < 8; ix++) {
446
2.01G
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.01G
    }
448
251M
  }
449
62.8M
  float block00 = coefficients[0] * 0.25f;
450
62.8M
  float block01 = coefficients[1];
451
62.8M
  float block10 = coefficients[8];
452
62.8M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
62.8M
  coefficients[1] = (block00 - block01) * 0.5f;
454
62.8M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
62.8M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
247k
                            float* JXL_RESTRICT coefficients) {
411
247k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
247k
  size_t afv_x = afv_kind & 1;
413
247k
  size_t afv_y = afv_kind / 2;
414
247k
  HWY_ALIGN float block[4 * 8] = {};
415
1.23M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.95M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.96M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
3.96M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
3.96M
    }
420
991k
  }
421
  // AFV coefficients in (even, even) positions.
422
247k
  HWY_ALIGN float coeff[4 * 4];
423
247k
  AFVDCT4x4(block, coeff);
424
1.23M
  for (size_t iy = 0; iy < 4; iy++) {
425
4.95M
    for (size_t ix = 0; ix < 4; ix++) {
426
3.96M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
3.96M
    }
428
991k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
247k
  ComputeScaledDCT<4, 4>()(
431
247k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
247k
              pixels_stride),
433
247k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.23M
  for (size_t iy = 0; iy < 4; iy++) {
436
8.91M
    for (size_t ix = 0; ix < 8; ix++) {
437
7.92M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
7.92M
    }
439
991k
  }
440
  // 4x8 DCT of the other half of the block.
441
247k
  ComputeScaledDCT<4, 8>()(
442
247k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
247k
      block, scratch_space);
444
1.23M
  for (size_t iy = 0; iy < 4; iy++) {
445
8.91M
    for (size_t ix = 0; ix < 8; ix++) {
446
7.92M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
7.92M
    }
448
991k
  }
449
247k
  float block00 = coefficients[0] * 0.25f;
450
247k
  float block01 = coefficients[1];
451
247k
  float block10 = coefficients[8];
452
247k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
247k
  coefficients[1] = (block00 - block01) * 0.5f;
454
247k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
247k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
364k
                            float* JXL_RESTRICT coefficients) {
411
364k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
364k
  size_t afv_x = afv_kind & 1;
413
364k
  size_t afv_y = afv_kind / 2;
414
364k
  HWY_ALIGN float block[4 * 8] = {};
415
1.82M
  for (size_t iy = 0; iy < 4; iy++) {
416
7.29M
    for (size_t ix = 0; ix < 4; ix++) {
417
5.83M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
5.83M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
5.83M
    }
420
1.45M
  }
421
  // AFV coefficients in (even, even) positions.
422
364k
  HWY_ALIGN float coeff[4 * 4];
423
364k
  AFVDCT4x4(block, coeff);
424
1.82M
  for (size_t iy = 0; iy < 4; iy++) {
425
7.29M
    for (size_t ix = 0; ix < 4; ix++) {
426
5.83M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
5.83M
    }
428
1.45M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
364k
  ComputeScaledDCT<4, 4>()(
431
364k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
364k
              pixels_stride),
433
364k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.82M
  for (size_t iy = 0; iy < 4; iy++) {
436
13.1M
    for (size_t ix = 0; ix < 8; ix++) {
437
11.6M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
11.6M
    }
439
1.45M
  }
440
  // 4x8 DCT of the other half of the block.
441
364k
  ComputeScaledDCT<4, 8>()(
442
364k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
364k
      block, scratch_space);
444
1.82M
  for (size_t iy = 0; iy < 4; iy++) {
445
13.1M
    for (size_t ix = 0; ix < 8; ix++) {
446
11.6M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
11.6M
    }
448
1.45M
  }
449
364k
  float block00 = coefficients[0] * 0.25f;
450
364k
  float block01 = coefficients[1];
451
364k
  float block10 = coefficients[8];
452
364k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
364k
  coefficients[1] = (block00 - block01) * 0.5f;
454
364k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
364k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
226k
                            float* JXL_RESTRICT coefficients) {
411
226k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
226k
  size_t afv_x = afv_kind & 1;
413
226k
  size_t afv_y = afv_kind / 2;
414
226k
  HWY_ALIGN float block[4 * 8] = {};
415
1.13M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.53M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.62M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
3.62M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
3.62M
    }
420
906k
  }
421
  // AFV coefficients in (even, even) positions.
422
226k
  HWY_ALIGN float coeff[4 * 4];
423
226k
  AFVDCT4x4(block, coeff);
424
1.13M
  for (size_t iy = 0; iy < 4; iy++) {
425
4.53M
    for (size_t ix = 0; ix < 4; ix++) {
426
3.62M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
3.62M
    }
428
906k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
226k
  ComputeScaledDCT<4, 4>()(
431
226k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
226k
              pixels_stride),
433
226k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.13M
  for (size_t iy = 0; iy < 4; iy++) {
436
8.15M
    for (size_t ix = 0; ix < 8; ix++) {
437
7.25M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
7.25M
    }
439
906k
  }
440
  // 4x8 DCT of the other half of the block.
441
226k
  ComputeScaledDCT<4, 8>()(
442
226k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
226k
      block, scratch_space);
444
1.13M
  for (size_t iy = 0; iy < 4; iy++) {
445
8.15M
    for (size_t ix = 0; ix < 8; ix++) {
446
7.25M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
7.25M
    }
448
906k
  }
449
226k
  float block00 = coefficients[0] * 0.25f;
450
226k
  float block01 = coefficients[1];
451
226k
  float block10 = coefficients[8];
452
226k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
226k
  coefficients[1] = (block00 - block01) * 0.5f;
454
226k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
226k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
270k
                            float* JXL_RESTRICT coefficients) {
411
270k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
270k
  size_t afv_x = afv_kind & 1;
413
270k
  size_t afv_y = afv_kind / 2;
414
270k
  HWY_ALIGN float block[4 * 8] = {};
415
1.35M
  for (size_t iy = 0; iy < 4; iy++) {
416
5.41M
    for (size_t ix = 0; ix < 4; ix++) {
417
4.33M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
4.33M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
4.33M
    }
420
1.08M
  }
421
  // AFV coefficients in (even, even) positions.
422
270k
  HWY_ALIGN float coeff[4 * 4];
423
270k
  AFVDCT4x4(block, coeff);
424
1.35M
  for (size_t iy = 0; iy < 4; iy++) {
425
5.41M
    for (size_t ix = 0; ix < 4; ix++) {
426
4.33M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
4.33M
    }
428
1.08M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
270k
  ComputeScaledDCT<4, 4>()(
431
270k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
270k
              pixels_stride),
433
270k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.35M
  for (size_t iy = 0; iy < 4; iy++) {
436
9.74M
    for (size_t ix = 0; ix < 8; ix++) {
437
8.66M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
8.66M
    }
439
1.08M
  }
440
  // 4x8 DCT of the other half of the block.
441
270k
  ComputeScaledDCT<4, 8>()(
442
270k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
270k
      block, scratch_space);
444
1.35M
  for (size_t iy = 0; iy < 4; iy++) {
445
9.74M
    for (size_t ix = 0; ix < 8; ix++) {
446
8.66M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
8.66M
    }
448
1.08M
  }
449
270k
  float block00 = coefficients[0] * 0.25f;
450
270k
  float block01 = coefficients[1];
451
270k
  float block10 = coefficients[8];
452
270k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
270k
  coefficients[1] = (block00 - block01) * 0.5f;
454
270k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
270k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
15.1M
                            float* JXL_RESTRICT coefficients) {
411
15.1M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
15.1M
  size_t afv_x = afv_kind & 1;
413
15.1M
  size_t afv_y = afv_kind / 2;
414
15.1M
  HWY_ALIGN float block[4 * 8] = {};
415
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
416
303M
    for (size_t ix = 0; ix < 4; ix++) {
417
242M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
242M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
242M
    }
420
60.6M
  }
421
  // AFV coefficients in (even, even) positions.
422
15.1M
  HWY_ALIGN float coeff[4 * 4];
423
15.1M
  AFVDCT4x4(block, coeff);
424
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
425
303M
    for (size_t ix = 0; ix < 4; ix++) {
426
242M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
242M
    }
428
60.6M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
15.1M
  ComputeScaledDCT<4, 4>()(
431
15.1M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
15.1M
              pixels_stride),
433
15.1M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
436
545M
    for (size_t ix = 0; ix < 8; ix++) {
437
485M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
485M
    }
439
60.6M
  }
440
  // 4x8 DCT of the other half of the block.
441
15.1M
  ComputeScaledDCT<4, 8>()(
442
15.1M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
15.1M
      block, scratch_space);
444
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
445
545M
    for (size_t ix = 0; ix < 8; ix++) {
446
485M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
485M
    }
448
60.6M
  }
449
15.1M
  float block00 = coefficients[0] * 0.25f;
450
15.1M
  float block01 = coefficients[1];
451
15.1M
  float block10 = coefficients[8];
452
15.1M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
15.1M
  coefficients[1] = (block00 - block01) * 0.5f;
454
15.1M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
15.1M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
15.1M
                            float* JXL_RESTRICT coefficients) {
411
15.1M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
15.1M
  size_t afv_x = afv_kind & 1;
413
15.1M
  size_t afv_y = afv_kind / 2;
414
15.1M
  HWY_ALIGN float block[4 * 8] = {};
415
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
416
303M
    for (size_t ix = 0; ix < 4; ix++) {
417
242M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
242M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
242M
    }
420
60.6M
  }
421
  // AFV coefficients in (even, even) positions.
422
15.1M
  HWY_ALIGN float coeff[4 * 4];
423
15.1M
  AFVDCT4x4(block, coeff);
424
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
425
303M
    for (size_t ix = 0; ix < 4; ix++) {
426
242M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
242M
    }
428
60.6M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
15.1M
  ComputeScaledDCT<4, 4>()(
431
15.1M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
15.1M
              pixels_stride),
433
15.1M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
436
545M
    for (size_t ix = 0; ix < 8; ix++) {
437
485M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
485M
    }
439
60.6M
  }
440
  // 4x8 DCT of the other half of the block.
441
15.1M
  ComputeScaledDCT<4, 8>()(
442
15.1M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
15.1M
      block, scratch_space);
444
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
445
545M
    for (size_t ix = 0; ix < 8; ix++) {
446
485M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
485M
    }
448
60.6M
  }
449
15.1M
  float block00 = coefficients[0] * 0.25f;
450
15.1M
  float block01 = coefficients[1];
451
15.1M
  float block10 = coefficients[8];
452
15.1M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
15.1M
  coefficients[1] = (block00 - block01) * 0.5f;
454
15.1M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
15.1M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
15.1M
                            float* JXL_RESTRICT coefficients) {
411
15.1M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
15.1M
  size_t afv_x = afv_kind & 1;
413
15.1M
  size_t afv_y = afv_kind / 2;
414
15.1M
  HWY_ALIGN float block[4 * 8] = {};
415
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
416
303M
    for (size_t ix = 0; ix < 4; ix++) {
417
242M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
242M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
242M
    }
420
60.6M
  }
421
  // AFV coefficients in (even, even) positions.
422
15.1M
  HWY_ALIGN float coeff[4 * 4];
423
15.1M
  AFVDCT4x4(block, coeff);
424
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
425
303M
    for (size_t ix = 0; ix < 4; ix++) {
426
242M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
242M
    }
428
60.6M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
15.1M
  ComputeScaledDCT<4, 4>()(
431
15.1M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
15.1M
              pixels_stride),
433
15.1M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
436
545M
    for (size_t ix = 0; ix < 8; ix++) {
437
485M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
485M
    }
439
60.6M
  }
440
  // 4x8 DCT of the other half of the block.
441
15.1M
  ComputeScaledDCT<4, 8>()(
442
15.1M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
15.1M
      block, scratch_space);
444
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
445
545M
    for (size_t ix = 0; ix < 8; ix++) {
446
485M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
485M
    }
448
60.6M
  }
449
15.1M
  float block00 = coefficients[0] * 0.25f;
450
15.1M
  float block01 = coefficients[1];
451
15.1M
  float block10 = coefficients[8];
452
15.1M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
15.1M
  coefficients[1] = (block00 - block01) * 0.5f;
454
15.1M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
15.1M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
15.1M
                            float* JXL_RESTRICT coefficients) {
411
15.1M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
15.1M
  size_t afv_x = afv_kind & 1;
413
15.1M
  size_t afv_y = afv_kind / 2;
414
15.1M
  HWY_ALIGN float block[4 * 8] = {};
415
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
416
303M
    for (size_t ix = 0; ix < 4; ix++) {
417
242M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
242M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
242M
    }
420
60.6M
  }
421
  // AFV coefficients in (even, even) positions.
422
15.1M
  HWY_ALIGN float coeff[4 * 4];
423
15.1M
  AFVDCT4x4(block, coeff);
424
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
425
303M
    for (size_t ix = 0; ix < 4; ix++) {
426
242M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
242M
    }
428
60.6M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
15.1M
  ComputeScaledDCT<4, 4>()(
431
15.1M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
15.1M
              pixels_stride),
433
15.1M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
436
545M
    for (size_t ix = 0; ix < 8; ix++) {
437
485M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
485M
    }
439
60.6M
  }
440
  // 4x8 DCT of the other half of the block.
441
15.1M
  ComputeScaledDCT<4, 8>()(
442
15.1M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
15.1M
      block, scratch_space);
444
75.8M
  for (size_t iy = 0; iy < 4; iy++) {
445
545M
    for (size_t ix = 0; ix < 8; ix++) {
446
485M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
485M
    }
448
60.6M
  }
449
15.1M
  float block00 = coefficients[0] * 0.25f;
450
15.1M
  float block01 = coefficients[1];
451
15.1M
  float block10 = coefficients[8];
452
15.1M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
15.1M
  coefficients[1] = (block00 - block01) * 0.5f;
454
15.1M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
15.1M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
247k
                            float* JXL_RESTRICT coefficients) {
411
247k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
247k
  size_t afv_x = afv_kind & 1;
413
247k
  size_t afv_y = afv_kind / 2;
414
247k
  HWY_ALIGN float block[4 * 8] = {};
415
1.23M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.95M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.96M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
3.96M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
3.96M
    }
420
991k
  }
421
  // AFV coefficients in (even, even) positions.
422
247k
  HWY_ALIGN float coeff[4 * 4];
423
247k
  AFVDCT4x4(block, coeff);
424
1.23M
  for (size_t iy = 0; iy < 4; iy++) {
425
4.95M
    for (size_t ix = 0; ix < 4; ix++) {
426
3.96M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
3.96M
    }
428
991k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
247k
  ComputeScaledDCT<4, 4>()(
431
247k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
247k
              pixels_stride),
433
247k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.23M
  for (size_t iy = 0; iy < 4; iy++) {
436
8.91M
    for (size_t ix = 0; ix < 8; ix++) {
437
7.92M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
7.92M
    }
439
991k
  }
440
  // 4x8 DCT of the other half of the block.
441
247k
  ComputeScaledDCT<4, 8>()(
442
247k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
247k
      block, scratch_space);
444
1.23M
  for (size_t iy = 0; iy < 4; iy++) {
445
8.91M
    for (size_t ix = 0; ix < 8; ix++) {
446
7.92M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
7.92M
    }
448
991k
  }
449
247k
  float block00 = coefficients[0] * 0.25f;
450
247k
  float block01 = coefficients[1];
451
247k
  float block10 = coefficients[8];
452
247k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
247k
  coefficients[1] = (block00 - block01) * 0.5f;
454
247k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
247k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
364k
                            float* JXL_RESTRICT coefficients) {
411
364k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
364k
  size_t afv_x = afv_kind & 1;
413
364k
  size_t afv_y = afv_kind / 2;
414
364k
  HWY_ALIGN float block[4 * 8] = {};
415
1.82M
  for (size_t iy = 0; iy < 4; iy++) {
416
7.29M
    for (size_t ix = 0; ix < 4; ix++) {
417
5.83M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
5.83M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
5.83M
    }
420
1.45M
  }
421
  // AFV coefficients in (even, even) positions.
422
364k
  HWY_ALIGN float coeff[4 * 4];
423
364k
  AFVDCT4x4(block, coeff);
424
1.82M
  for (size_t iy = 0; iy < 4; iy++) {
425
7.29M
    for (size_t ix = 0; ix < 4; ix++) {
426
5.83M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
5.83M
    }
428
1.45M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
364k
  ComputeScaledDCT<4, 4>()(
431
364k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
364k
              pixels_stride),
433
364k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.82M
  for (size_t iy = 0; iy < 4; iy++) {
436
13.1M
    for (size_t ix = 0; ix < 8; ix++) {
437
11.6M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
11.6M
    }
439
1.45M
  }
440
  // 4x8 DCT of the other half of the block.
441
364k
  ComputeScaledDCT<4, 8>()(
442
364k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
364k
      block, scratch_space);
444
1.82M
  for (size_t iy = 0; iy < 4; iy++) {
445
13.1M
    for (size_t ix = 0; ix < 8; ix++) {
446
11.6M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
11.6M
    }
448
1.45M
  }
449
364k
  float block00 = coefficients[0] * 0.25f;
450
364k
  float block01 = coefficients[1];
451
364k
  float block10 = coefficients[8];
452
364k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
364k
  coefficients[1] = (block00 - block01) * 0.5f;
454
364k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
364k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
226k
                            float* JXL_RESTRICT coefficients) {
411
226k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
226k
  size_t afv_x = afv_kind & 1;
413
226k
  size_t afv_y = afv_kind / 2;
414
226k
  HWY_ALIGN float block[4 * 8] = {};
415
1.13M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.53M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.62M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
3.62M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
3.62M
    }
420
906k
  }
421
  // AFV coefficients in (even, even) positions.
422
226k
  HWY_ALIGN float coeff[4 * 4];
423
226k
  AFVDCT4x4(block, coeff);
424
1.13M
  for (size_t iy = 0; iy < 4; iy++) {
425
4.53M
    for (size_t ix = 0; ix < 4; ix++) {
426
3.62M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
3.62M
    }
428
906k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
226k
  ComputeScaledDCT<4, 4>()(
431
226k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
226k
              pixels_stride),
433
226k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.13M
  for (size_t iy = 0; iy < 4; iy++) {
436
8.15M
    for (size_t ix = 0; ix < 8; ix++) {
437
7.25M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
7.25M
    }
439
906k
  }
440
  // 4x8 DCT of the other half of the block.
441
226k
  ComputeScaledDCT<4, 8>()(
442
226k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
226k
      block, scratch_space);
444
1.13M
  for (size_t iy = 0; iy < 4; iy++) {
445
8.15M
    for (size_t ix = 0; ix < 8; ix++) {
446
7.25M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
7.25M
    }
448
906k
  }
449
226k
  float block00 = coefficients[0] * 0.25f;
450
226k
  float block01 = coefficients[1];
451
226k
  float block10 = coefficients[8];
452
226k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
226k
  coefficients[1] = (block00 - block01) * 0.5f;
454
226k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
226k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
270k
                            float* JXL_RESTRICT coefficients) {
411
270k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
270k
  size_t afv_x = afv_kind & 1;
413
270k
  size_t afv_y = afv_kind / 2;
414
270k
  HWY_ALIGN float block[4 * 8] = {};
415
1.35M
  for (size_t iy = 0; iy < 4; iy++) {
416
5.41M
    for (size_t ix = 0; ix < 4; ix++) {
417
4.33M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
4.33M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
4.33M
    }
420
1.08M
  }
421
  // AFV coefficients in (even, even) positions.
422
270k
  HWY_ALIGN float coeff[4 * 4];
423
270k
  AFVDCT4x4(block, coeff);
424
1.35M
  for (size_t iy = 0; iy < 4; iy++) {
425
5.41M
    for (size_t ix = 0; ix < 4; ix++) {
426
4.33M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
4.33M
    }
428
1.08M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
270k
  ComputeScaledDCT<4, 4>()(
431
270k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
270k
              pixels_stride),
433
270k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.35M
  for (size_t iy = 0; iy < 4; iy++) {
436
9.74M
    for (size_t ix = 0; ix < 8; ix++) {
437
8.66M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
8.66M
    }
439
1.08M
  }
440
  // 4x8 DCT of the other half of the block.
441
270k
  ComputeScaledDCT<4, 8>()(
442
270k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
270k
      block, scratch_space);
444
1.35M
  for (size_t iy = 0; iy < 4; iy++) {
445
9.74M
    for (size_t ix = 0; ix < 8; ix++) {
446
8.66M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
8.66M
    }
448
1.08M
  }
449
270k
  float block00 = coefficients[0] * 0.25f;
450
270k
  float block01 = coefficients[1];
451
270k
  float block10 = coefficients[8];
452
270k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
270k
  coefficients[1] = (block00 - block01) * 0.5f;
454
270k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
270k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
456
457
HWY_MAYBE_UNUSED void TransformFromPixels(const AcStrategyType strategy,
458
                                          const float* JXL_RESTRICT pixels,
459
                                          size_t pixels_stride,
460
                                          float* JXL_RESTRICT coefficients,
461
221M
                                          float* JXL_RESTRICT scratch_space) {
462
221M
  using Type = AcStrategyType;
463
221M
  switch (strategy) {
464
17.2M
    case Type::IDENTITY: {
465
51.7M
      for (size_t y = 0; y < 2; y++) {
466
103M
        for (size_t x = 0; x < 2; x++) {
467
69.0M
          float block_dc = 0;
468
345M
          for (size_t iy = 0; iy < 4; iy++) {
469
1.38G
            for (size_t ix = 0; ix < 4; ix++) {
470
1.10G
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
1.10G
            }
472
276M
          }
473
69.0M
          block_dc *= 1.0f / 16;
474
345M
          for (size_t iy = 0; iy < 4; iy++) {
475
1.38G
            for (size_t ix = 0; ix < 4; ix++) {
476
1.10G
              if (ix == 1 && iy == 1) continue;
477
1.03G
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
1.03G
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
1.03G
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
1.03G
            }
481
276M
          }
482
69.0M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
69.0M
          coefficients[y * 8 + x] = block_dc;
484
69.0M
        }
485
34.5M
      }
486
17.2M
      float block00 = coefficients[0];
487
17.2M
      float block01 = coefficients[1];
488
17.2M
      float block10 = coefficients[8];
489
17.2M
      float block11 = coefficients[9];
490
17.2M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
17.2M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
17.2M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
17.2M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
17.2M
      break;
495
0
    }
496
16.0M
    case Type::DCT8X4: {
497
48.1M
      for (size_t x = 0; x < 2; x++) {
498
32.1M
        HWY_ALIGN float block[4 * 8];
499
32.1M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
32.1M
                                 scratch_space);
501
160M
        for (size_t iy = 0; iy < 4; iy++) {
502
1.15G
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
1.02G
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
1.02G
          }
506
128M
        }
507
32.1M
      }
508
16.0M
      float block0 = coefficients[0];
509
16.0M
      float block1 = coefficients[8];
510
16.0M
      coefficients[0] = (block0 + block1) * 0.5f;
511
16.0M
      coefficients[8] = (block0 - block1) * 0.5f;
512
16.0M
      break;
513
0
    }
514
15.7M
    case Type::DCT4X8: {
515
47.2M
      for (size_t y = 0; y < 2; y++) {
516
31.4M
        HWY_ALIGN float block[4 * 8];
517
31.4M
        ComputeScaledDCT<4, 8>()(
518
31.4M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
31.4M
            scratch_space);
520
157M
        for (size_t iy = 0; iy < 4; iy++) {
521
1.13G
          for (size_t ix = 0; ix < 8; ix++) {
522
1.00G
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
1.00G
          }
524
125M
        }
525
31.4M
      }
526
15.7M
      float block0 = coefficients[0];
527
15.7M
      float block1 = coefficients[8];
528
15.7M
      coefficients[0] = (block0 + block1) * 0.5f;
529
15.7M
      coefficients[8] = (block0 - block1) * 0.5f;
530
15.7M
      break;
531
0
    }
532
15.1M
    case Type::DCT4X4: {
533
45.5M
      for (size_t y = 0; y < 2; y++) {
534
91.0M
        for (size_t x = 0; x < 2; x++) {
535
60.6M
          HWY_ALIGN float block[4 * 4];
536
60.6M
          ComputeScaledDCT<4, 4>()(
537
60.6M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
60.6M
              block, scratch_space);
539
303M
          for (size_t iy = 0; iy < 4; iy++) {
540
1.21G
            for (size_t ix = 0; ix < 4; ix++) {
541
970M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
970M
            }
543
242M
          }
544
60.6M
        }
545
30.3M
      }
546
15.1M
      float block00 = coefficients[0];
547
15.1M
      float block01 = coefficients[1];
548
15.1M
      float block10 = coefficients[8];
549
15.1M
      float block11 = coefficients[9];
550
15.1M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
15.1M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
15.1M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
15.1M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
15.1M
      break;
555
0
    }
556
17.2M
    case Type::DCT2X2: {
557
17.2M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
17.2M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
17.2M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
17.2M
      break;
561
0
    }
562
6.74M
    case Type::DCT16X16: {
563
6.74M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
6.74M
                                 scratch_space);
565
6.74M
      break;
566
0
    }
567
13.0M
    case Type::DCT16X8: {
568
13.0M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
13.0M
                                scratch_space);
570
13.0M
      break;
571
0
    }
572
13.3M
    case Type::DCT8X16: {
573
13.3M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
13.3M
                                scratch_space);
575
13.3M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
2.61M
    case Type::DCT32X16: {
588
2.61M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
2.61M
                                 scratch_space);
590
2.61M
      break;
591
0
    }
592
2.72M
    case Type::DCT16X32: {
593
2.72M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
2.72M
                                 scratch_space);
595
2.72M
      break;
596
0
    }
597
1.55M
    case Type::DCT32X32: {
598
1.55M
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
1.55M
                                 scratch_space);
600
1.55M
      break;
601
0
    }
602
35.7M
    case Type::DCT: {
603
35.7M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
35.7M
                               scratch_space);
605
35.7M
      break;
606
0
    }
607
15.6M
    case Type::AFV0: {
608
15.6M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
15.6M
      break;
610
0
    }
611
15.8M
    case Type::AFV1: {
612
15.8M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
15.8M
      break;
614
0
    }
615
15.6M
    case Type::AFV2: {
616
15.6M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
15.6M
      break;
618
0
    }
619
15.7M
    case Type::AFV3: {
620
15.7M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
15.7M
      break;
622
0
    }
623
266k
    case Type::DCT64X64: {
624
266k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
266k
                                 scratch_space);
626
266k
      break;
627
0
    }
628
790k
    case Type::DCT64X32: {
629
790k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
790k
                                 scratch_space);
631
790k
      break;
632
0
    }
633
552k
    case Type::DCT32X64: {
634
552k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
552k
                                 scratch_space);
636
552k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
221M
  }
669
221M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
7.90M
                                          float* JXL_RESTRICT scratch_space) {
462
7.90M
  using Type = AcStrategyType;
463
7.90M
  switch (strategy) {
464
1.04M
    case Type::IDENTITY: {
465
3.13M
      for (size_t y = 0; y < 2; y++) {
466
6.27M
        for (size_t x = 0; x < 2; x++) {
467
4.18M
          float block_dc = 0;
468
20.9M
          for (size_t iy = 0; iy < 4; iy++) {
469
83.6M
            for (size_t ix = 0; ix < 4; ix++) {
470
66.8M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
66.8M
            }
472
16.7M
          }
473
4.18M
          block_dc *= 1.0f / 16;
474
20.9M
          for (size_t iy = 0; iy < 4; iy++) {
475
83.6M
            for (size_t ix = 0; ix < 4; ix++) {
476
66.8M
              if (ix == 1 && iy == 1) continue;
477
62.7M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
62.7M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
62.7M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
62.7M
            }
481
16.7M
          }
482
4.18M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
4.18M
          coefficients[y * 8 + x] = block_dc;
484
4.18M
        }
485
2.09M
      }
486
1.04M
      float block00 = coefficients[0];
487
1.04M
      float block01 = coefficients[1];
488
1.04M
      float block10 = coefficients[8];
489
1.04M
      float block11 = coefficients[9];
490
1.04M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
1.04M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
1.04M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
1.04M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
1.04M
      break;
495
0
    }
496
449k
    case Type::DCT8X4: {
497
1.34M
      for (size_t x = 0; x < 2; x++) {
498
899k
        HWY_ALIGN float block[4 * 8];
499
899k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
899k
                                 scratch_space);
501
4.49M
        for (size_t iy = 0; iy < 4; iy++) {
502
32.3M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
28.7M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
28.7M
          }
506
3.59M
        }
507
899k
      }
508
449k
      float block0 = coefficients[0];
509
449k
      float block1 = coefficients[8];
510
449k
      coefficients[0] = (block0 + block1) * 0.5f;
511
449k
      coefficients[8] = (block0 - block1) * 0.5f;
512
449k
      break;
513
0
    }
514
293k
    case Type::DCT4X8: {
515
879k
      for (size_t y = 0; y < 2; y++) {
516
586k
        HWY_ALIGN float block[4 * 8];
517
586k
        ComputeScaledDCT<4, 8>()(
518
586k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
586k
            scratch_space);
520
2.93M
        for (size_t iy = 0; iy < 4; iy++) {
521
21.1M
          for (size_t ix = 0; ix < 8; ix++) {
522
18.7M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
18.7M
          }
524
2.34M
        }
525
586k
      }
526
293k
      float block0 = coefficients[0];
527
293k
      float block1 = coefficients[8];
528
293k
      coefficients[0] = (block0 + block1) * 0.5f;
529
293k
      coefficients[8] = (block0 - block1) * 0.5f;
530
293k
      break;
531
0
    }
532
2.30k
    case Type::DCT4X4: {
533
6.91k
      for (size_t y = 0; y < 2; y++) {
534
13.8k
        for (size_t x = 0; x < 2; x++) {
535
9.21k
          HWY_ALIGN float block[4 * 4];
536
9.21k
          ComputeScaledDCT<4, 4>()(
537
9.21k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
9.21k
              block, scratch_space);
539
46.0k
          for (size_t iy = 0; iy < 4; iy++) {
540
184k
            for (size_t ix = 0; ix < 4; ix++) {
541
147k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
147k
            }
543
36.8k
          }
544
9.21k
        }
545
4.60k
      }
546
2.30k
      float block00 = coefficients[0];
547
2.30k
      float block01 = coefficients[1];
548
2.30k
      float block10 = coefficients[8];
549
2.30k
      float block11 = coefficients[9];
550
2.30k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
2.30k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
2.30k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
2.30k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
2.30k
      break;
555
0
    }
556
1.03M
    case Type::DCT2X2: {
557
1.03M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
1.03M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
1.03M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
1.03M
      break;
561
0
    }
562
178k
    case Type::DCT16X16: {
563
178k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
178k
                                 scratch_space);
565
178k
      break;
566
0
    }
567
293k
    case Type::DCT16X8: {
568
293k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
293k
                                scratch_space);
570
293k
      break;
571
0
    }
572
394k
    case Type::DCT8X16: {
573
394k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
394k
                                scratch_space);
575
394k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
77.5k
    case Type::DCT32X16: {
588
77.5k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
77.5k
                                 scratch_space);
590
77.5k
      break;
591
0
    }
592
121k
    case Type::DCT16X32: {
593
121k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
121k
                                 scratch_space);
595
121k
      break;
596
0
    }
597
148k
    case Type::DCT32X32: {
598
148k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
148k
                                 scratch_space);
600
148k
      break;
601
0
    }
602
2.70M
    case Type::DCT: {
603
2.70M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
2.70M
                               scratch_space);
605
2.70M
      break;
606
0
    }
607
247k
    case Type::AFV0: {
608
247k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
247k
      break;
610
0
    }
611
364k
    case Type::AFV1: {
612
364k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
364k
      break;
614
0
    }
615
226k
    case Type::AFV2: {
616
226k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
226k
      break;
618
0
    }
619
270k
    case Type::AFV3: {
620
270k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
270k
      break;
622
0
    }
623
30.7k
    case Type::DCT64X64: {
624
30.7k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
30.7k
                                 scratch_space);
626
30.7k
      break;
627
0
    }
628
5.78k
    case Type::DCT64X32: {
629
5.78k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
5.78k
                                 scratch_space);
631
5.78k
      break;
632
0
    }
633
9.74k
    case Type::DCT32X64: {
634
9.74k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
9.74k
                                 scratch_space);
636
9.74k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
7.90M
  }
669
7.90M
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
190M
                                          float* JXL_RESTRICT scratch_space) {
462
190M
  using Type = AcStrategyType;
463
190M
  switch (strategy) {
464
15.1M
    case Type::IDENTITY: {
465
45.4M
      for (size_t y = 0; y < 2; y++) {
466
90.9M
        for (size_t x = 0; x < 2; x++) {
467
60.6M
          float block_dc = 0;
468
303M
          for (size_t iy = 0; iy < 4; iy++) {
469
1.21G
            for (size_t ix = 0; ix < 4; ix++) {
470
970M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
970M
            }
472
242M
          }
473
60.6M
          block_dc *= 1.0f / 16;
474
303M
          for (size_t iy = 0; iy < 4; iy++) {
475
1.21G
            for (size_t ix = 0; ix < 4; ix++) {
476
970M
              if (ix == 1 && iy == 1) continue;
477
909M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
909M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
909M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
909M
            }
481
242M
          }
482
60.6M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
60.6M
          coefficients[y * 8 + x] = block_dc;
484
60.6M
        }
485
30.3M
      }
486
15.1M
      float block00 = coefficients[0];
487
15.1M
      float block01 = coefficients[1];
488
15.1M
      float block10 = coefficients[8];
489
15.1M
      float block11 = coefficients[9];
490
15.1M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
15.1M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
15.1M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
15.1M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
15.1M
      break;
495
0
    }
496
15.1M
    case Type::DCT8X4: {
497
45.4M
      for (size_t x = 0; x < 2; x++) {
498
30.3M
        HWY_ALIGN float block[4 * 8];
499
30.3M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
30.3M
                                 scratch_space);
501
151M
        for (size_t iy = 0; iy < 4; iy++) {
502
1.09G
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
970M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
970M
          }
506
121M
        }
507
30.3M
      }
508
15.1M
      float block0 = coefficients[0];
509
15.1M
      float block1 = coefficients[8];
510
15.1M
      coefficients[0] = (block0 + block1) * 0.5f;
511
15.1M
      coefficients[8] = (block0 - block1) * 0.5f;
512
15.1M
      break;
513
0
    }
514
15.1M
    case Type::DCT4X8: {
515
45.4M
      for (size_t y = 0; y < 2; y++) {
516
30.3M
        HWY_ALIGN float block[4 * 8];
517
30.3M
        ComputeScaledDCT<4, 8>()(
518
30.3M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
30.3M
            scratch_space);
520
151M
        for (size_t iy = 0; iy < 4; iy++) {
521
1.09G
          for (size_t ix = 0; ix < 8; ix++) {
522
970M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
970M
          }
524
121M
        }
525
30.3M
      }
526
15.1M
      float block0 = coefficients[0];
527
15.1M
      float block1 = coefficients[8];
528
15.1M
      coefficients[0] = (block0 + block1) * 0.5f;
529
15.1M
      coefficients[8] = (block0 - block1) * 0.5f;
530
15.1M
      break;
531
0
    }
532
15.1M
    case Type::DCT4X4: {
533
45.4M
      for (size_t y = 0; y < 2; y++) {
534
90.9M
        for (size_t x = 0; x < 2; x++) {
535
60.6M
          HWY_ALIGN float block[4 * 4];
536
60.6M
          ComputeScaledDCT<4, 4>()(
537
60.6M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
60.6M
              block, scratch_space);
539
303M
          for (size_t iy = 0; iy < 4; iy++) {
540
1.21G
            for (size_t ix = 0; ix < 4; ix++) {
541
970M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
970M
            }
543
242M
          }
544
60.6M
        }
545
30.3M
      }
546
15.1M
      float block00 = coefficients[0];
547
15.1M
      float block01 = coefficients[1];
548
15.1M
      float block10 = coefficients[8];
549
15.1M
      float block11 = coefficients[9];
550
15.1M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
15.1M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
15.1M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
15.1M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
15.1M
      break;
555
0
    }
556
15.1M
    case Type::DCT2X2: {
557
15.1M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
15.1M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
15.1M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
15.1M
      break;
561
0
    }
562
6.38M
    case Type::DCT16X16: {
563
6.38M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
6.38M
                                 scratch_space);
565
6.38M
      break;
566
0
    }
567
12.4M
    case Type::DCT16X8: {
568
12.4M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
12.4M
                                scratch_space);
570
12.4M
      break;
571
0
    }
572
12.5M
    case Type::DCT8X16: {
573
12.5M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
12.5M
                                scratch_space);
575
12.5M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
2.46M
    case Type::DCT32X16: {
588
2.46M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
2.46M
                                 scratch_space);
590
2.46M
      break;
591
0
    }
592
2.48M
    case Type::DCT16X32: {
593
2.48M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
2.48M
                                 scratch_space);
595
2.48M
      break;
596
0
    }
597
1.26M
    case Type::DCT32X32: {
598
1.26M
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
1.26M
                                 scratch_space);
600
1.26M
      break;
601
0
    }
602
15.1M
    case Type::DCT: {
603
15.1M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
15.1M
                               scratch_space);
605
15.1M
      break;
606
0
    }
607
15.1M
    case Type::AFV0: {
608
15.1M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
15.1M
      break;
610
0
    }
611
15.1M
    case Type::AFV1: {
612
15.1M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
15.1M
      break;
614
0
    }
615
15.1M
    case Type::AFV2: {
616
15.1M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
15.1M
      break;
618
0
    }
619
15.1M
    case Type::AFV3: {
620
15.1M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
15.1M
      break;
622
0
    }
623
205k
    case Type::DCT64X64: {
624
205k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
205k
                                 scratch_space);
626
205k
      break;
627
0
    }
628
778k
    case Type::DCT64X32: {
629
778k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
778k
                                 scratch_space);
631
778k
      break;
632
0
    }
633
533k
    case Type::DCT32X64: {
634
533k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
533k
                                 scratch_space);
636
533k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
190M
  }
669
190M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
23.0M
                                          float* JXL_RESTRICT scratch_space) {
462
23.0M
  using Type = AcStrategyType;
463
23.0M
  switch (strategy) {
464
1.04M
    case Type::IDENTITY: {
465
3.13M
      for (size_t y = 0; y < 2; y++) {
466
6.27M
        for (size_t x = 0; x < 2; x++) {
467
4.18M
          float block_dc = 0;
468
20.9M
          for (size_t iy = 0; iy < 4; iy++) {
469
83.6M
            for (size_t ix = 0; ix < 4; ix++) {
470
66.8M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
66.8M
            }
472
16.7M
          }
473
4.18M
          block_dc *= 1.0f / 16;
474
20.9M
          for (size_t iy = 0; iy < 4; iy++) {
475
83.6M
            for (size_t ix = 0; ix < 4; ix++) {
476
66.8M
              if (ix == 1 && iy == 1) continue;
477
62.7M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
62.7M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
62.7M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
62.7M
            }
481
16.7M
          }
482
4.18M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
4.18M
          coefficients[y * 8 + x] = block_dc;
484
4.18M
        }
485
2.09M
      }
486
1.04M
      float block00 = coefficients[0];
487
1.04M
      float block01 = coefficients[1];
488
1.04M
      float block10 = coefficients[8];
489
1.04M
      float block11 = coefficients[9];
490
1.04M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
1.04M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
1.04M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
1.04M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
1.04M
      break;
495
0
    }
496
449k
    case Type::DCT8X4: {
497
1.34M
      for (size_t x = 0; x < 2; x++) {
498
899k
        HWY_ALIGN float block[4 * 8];
499
899k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
899k
                                 scratch_space);
501
4.49M
        for (size_t iy = 0; iy < 4; iy++) {
502
32.3M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
28.7M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
28.7M
          }
506
3.59M
        }
507
899k
      }
508
449k
      float block0 = coefficients[0];
509
449k
      float block1 = coefficients[8];
510
449k
      coefficients[0] = (block0 + block1) * 0.5f;
511
449k
      coefficients[8] = (block0 - block1) * 0.5f;
512
449k
      break;
513
0
    }
514
293k
    case Type::DCT4X8: {
515
879k
      for (size_t y = 0; y < 2; y++) {
516
586k
        HWY_ALIGN float block[4 * 8];
517
586k
        ComputeScaledDCT<4, 8>()(
518
586k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
586k
            scratch_space);
520
2.93M
        for (size_t iy = 0; iy < 4; iy++) {
521
21.1M
          for (size_t ix = 0; ix < 8; ix++) {
522
18.7M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
18.7M
          }
524
2.34M
        }
525
586k
      }
526
293k
      float block0 = coefficients[0];
527
293k
      float block1 = coefficients[8];
528
293k
      coefficients[0] = (block0 + block1) * 0.5f;
529
293k
      coefficients[8] = (block0 - block1) * 0.5f;
530
293k
      break;
531
0
    }
532
2.30k
    case Type::DCT4X4: {
533
6.91k
      for (size_t y = 0; y < 2; y++) {
534
13.8k
        for (size_t x = 0; x < 2; x++) {
535
9.21k
          HWY_ALIGN float block[4 * 4];
536
9.21k
          ComputeScaledDCT<4, 4>()(
537
9.21k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
9.21k
              block, scratch_space);
539
46.0k
          for (size_t iy = 0; iy < 4; iy++) {
540
184k
            for (size_t ix = 0; ix < 4; ix++) {
541
147k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
147k
            }
543
36.8k
          }
544
9.21k
        }
545
4.60k
      }
546
2.30k
      float block00 = coefficients[0];
547
2.30k
      float block01 = coefficients[1];
548
2.30k
      float block10 = coefficients[8];
549
2.30k
      float block11 = coefficients[9];
550
2.30k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
2.30k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
2.30k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
2.30k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
2.30k
      break;
555
0
    }
556
1.03M
    case Type::DCT2X2: {
557
1.03M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
1.03M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
1.03M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
1.03M
      break;
561
0
    }
562
178k
    case Type::DCT16X16: {
563
178k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
178k
                                 scratch_space);
565
178k
      break;
566
0
    }
567
293k
    case Type::DCT16X8: {
568
293k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
293k
                                scratch_space);
570
293k
      break;
571
0
    }
572
394k
    case Type::DCT8X16: {
573
394k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
394k
                                scratch_space);
575
394k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
77.5k
    case Type::DCT32X16: {
588
77.5k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
77.5k
                                 scratch_space);
590
77.5k
      break;
591
0
    }
592
121k
    case Type::DCT16X32: {
593
121k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
121k
                                 scratch_space);
595
121k
      break;
596
0
    }
597
148k
    case Type::DCT32X32: {
598
148k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
148k
                                 scratch_space);
600
148k
      break;
601
0
    }
602
17.8M
    case Type::DCT: {
603
17.8M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
17.8M
                               scratch_space);
605
17.8M
      break;
606
0
    }
607
247k
    case Type::AFV0: {
608
247k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
247k
      break;
610
0
    }
611
364k
    case Type::AFV1: {
612
364k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
364k
      break;
614
0
    }
615
226k
    case Type::AFV2: {
616
226k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
226k
      break;
618
0
    }
619
270k
    case Type::AFV3: {
620
270k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
270k
      break;
622
0
    }
623
30.7k
    case Type::DCT64X64: {
624
30.7k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
30.7k
                                 scratch_space);
626
30.7k
      break;
627
0
    }
628
5.78k
    case Type::DCT64X32: {
629
5.78k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
5.78k
                                 scratch_space);
631
5.78k
      break;
632
0
    }
633
9.74k
    case Type::DCT32X64: {
634
9.74k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
9.74k
                                 scratch_space);
636
9.74k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
23.0M
  }
669
23.0M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
670
671
// `scratch_space` should be at least 4 * kMaxBlocks * kMaxBlocks elements.
672
HWY_MAYBE_UNUSED void DCFromLowestFrequencies(const AcStrategyType strategy,
673
                                              const float* block, float* dc,
674
                                              size_t dc_stride,
675
30.9M
                                              float* scratch_space) {
676
30.9M
  using Type = AcStrategyType;
677
30.9M
  switch (strategy) {
678
587k
    case Type::DCT16X8: {
679
587k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
587k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
587k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
587k
      break;
683
0
    }
684
788k
    case Type::DCT8X16: {
685
788k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
788k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
788k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
788k
      break;
689
0
    }
690
356k
    case Type::DCT16X16: {
691
356k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
356k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
356k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
356k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
155k
    case Type::DCT32X16: {
709
155k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
155k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
155k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
155k
      break;
713
0
    }
714
242k
    case Type::DCT16X32: {
715
242k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
242k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
242k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
242k
      break;
719
0
    }
720
296k
    case Type::DCT32X32: {
721
296k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
296k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
296k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
296k
      break;
725
0
    }
726
11.5k
    case Type::DCT64X32: {
727
11.5k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
11.5k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
11.5k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
11.5k
      break;
731
0
    }
732
19.4k
    case Type::DCT32X64: {
733
19.4k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
19.4k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
19.4k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
19.4k
      break;
737
0
    }
738
61.5k
    case Type::DCT64X64: {
739
61.5k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
61.5k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
61.5k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
61.5k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
20.5M
    case Type::DCT:
787
22.6M
    case Type::DCT2X2:
788
22.6M
    case Type::DCT4X4:
789
23.2M
    case Type::DCT4X8:
790
24.1M
    case Type::DCT8X4:
791
24.6M
    case Type::AFV0:
792
25.3M
    case Type::AFV1:
793
25.8M
    case Type::AFV2:
794
26.3M
    case Type::AFV3:
795
28.4M
    case Type::IDENTITY:
796
28.4M
      dc[0] = block[0];
797
28.4M
      break;
798
30.9M
  }
799
30.9M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
7.90M
                                              float* scratch_space) {
676
7.90M
  using Type = AcStrategyType;
677
7.90M
  switch (strategy) {
678
293k
    case Type::DCT16X8: {
679
293k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
293k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
293k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
293k
      break;
683
0
    }
684
394k
    case Type::DCT8X16: {
685
394k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
394k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
394k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
394k
      break;
689
0
    }
690
178k
    case Type::DCT16X16: {
691
178k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
178k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
178k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
178k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
77.5k
    case Type::DCT32X16: {
709
77.5k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
77.5k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
77.5k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
77.5k
      break;
713
0
    }
714
121k
    case Type::DCT16X32: {
715
121k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
121k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
121k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
121k
      break;
719
0
    }
720
148k
    case Type::DCT32X32: {
721
148k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
148k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
148k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
148k
      break;
725
0
    }
726
5.78k
    case Type::DCT64X32: {
727
5.78k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
5.78k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
5.78k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
5.78k
      break;
731
0
    }
732
9.74k
    case Type::DCT32X64: {
733
9.74k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
9.74k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
9.74k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
9.74k
      break;
737
0
    }
738
30.7k
    case Type::DCT64X64: {
739
30.7k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
30.7k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
30.7k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
30.7k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
2.70M
    case Type::DCT:
787
3.74M
    case Type::DCT2X2:
788
3.74M
    case Type::DCT4X4:
789
4.03M
    case Type::DCT4X8:
790
4.48M
    case Type::DCT8X4:
791
4.73M
    case Type::AFV0:
792
5.09M
    case Type::AFV1:
793
5.32M
    case Type::AFV2:
794
5.59M
    case Type::AFV3:
795
6.64M
    case Type::IDENTITY:
796
6.64M
      dc[0] = block[0];
797
6.64M
      break;
798
7.90M
  }
799
7.90M
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
23.0M
                                              float* scratch_space) {
676
23.0M
  using Type = AcStrategyType;
677
23.0M
  switch (strategy) {
678
293k
    case Type::DCT16X8: {
679
293k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
293k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
293k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
293k
      break;
683
0
    }
684
394k
    case Type::DCT8X16: {
685
394k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
394k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
394k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
394k
      break;
689
0
    }
690
178k
    case Type::DCT16X16: {
691
178k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
178k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
178k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
178k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
77.5k
    case Type::DCT32X16: {
709
77.5k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
77.5k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
77.5k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
77.5k
      break;
713
0
    }
714
121k
    case Type::DCT16X32: {
715
121k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
121k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
121k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
121k
      break;
719
0
    }
720
148k
    case Type::DCT32X32: {
721
148k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
148k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
148k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
148k
      break;
725
0
    }
726
5.78k
    case Type::DCT64X32: {
727
5.78k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
5.78k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
5.78k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
5.78k
      break;
731
0
    }
732
9.74k
    case Type::DCT32X64: {
733
9.74k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
9.74k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
9.74k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
9.74k
      break;
737
0
    }
738
30.7k
    case Type::DCT64X64: {
739
30.7k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
30.7k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
30.7k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
30.7k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
17.8M
    case Type::DCT:
787
18.9M
    case Type::DCT2X2:
788
18.9M
    case Type::DCT4X4:
789
19.1M
    case Type::DCT4X8:
790
19.6M
    case Type::DCT8X4:
791
19.8M
    case Type::AFV0:
792
20.2M
    case Type::AFV1:
793
20.4M
    case Type::AFV2:
794
20.7M
    case Type::AFV3:
795
21.8M
    case Type::IDENTITY:
796
21.8M
      dc[0] = block[0];
797
21.8M
      break;
798
23.0M
  }
799
23.0M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
800
801
}  // namespace
802
// NOLINTNEXTLINE(google-readability-namespace-comments)
803
}  // namespace HWY_NAMESPACE
804
}  // namespace jxl
805
HWY_AFTER_NAMESPACE();
806
807
#endif  // LIB_JXL_ENC_TRANSFORMS_INL_H_