Coverage Report

Created: 2026-05-24 07:45

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/enc_transforms-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/base/compiler_specific.h"
7
#include "lib/jxl/frame_dimensions.h"
8
9
#if defined(LIB_JXL_ENC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
10
#ifdef LIB_JXL_ENC_TRANSFORMS_INL_H_
11
#undef LIB_JXL_ENC_TRANSFORMS_INL_H_
12
#else
13
#define LIB_JXL_ENC_TRANSFORMS_INL_H_
14
#endif
15
16
#include <cstddef>
17
#include <cstdint>
18
#include <hwy/highway.h>
19
20
#include "lib/jxl/ac_strategy.h"
21
#include "lib/jxl/dct-inl.h"
22
#include "lib/jxl/dct_scales.h"
23
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
27
enum class AcStrategyType : uint32_t;
28
29
namespace HWY_NAMESPACE {
30
namespace {
31
32
constexpr size_t kMaxBlocks = 32;
33
34
// Inverse of ReinterpretingDCT.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
HWY_INLINE void ReinterpretingIDCT(const float* input,
38
                                   const size_t input_stride, float* output,
39
2.45M
                                   const size_t output_stride, float* scratch) {
40
2.45M
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
2.45M
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
2.45M
  float* block = scratch;
43
2.45M
  if (ROWS < COLS) {
44
2.25M
    for (size_t y = 0; y < LF_ROWS; y++) {
45
5.07M
      for (size_t x = 0; x < LF_COLS; x++) {
46
3.81M
        block[y * COLS + x] = input[y * input_stride + x] *
47
3.81M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
3.81M
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
3.81M
      }
50
1.26M
    }
51
1.45M
  } else {
52
5.28M
    for (size_t y = 0; y < LF_COLS; y++) {
53
20.4M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
16.6M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
16.6M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
16.6M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
16.6M
      }
58
3.83M
    }
59
1.45M
  }
60
61
2.45M
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
2.45M
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
2.45M
                                  scratch_space);
64
2.45M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
257k
                                   const size_t output_stride, float* scratch) {
40
257k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
257k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
257k
  float* block = scratch;
43
257k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
257k
  } else {
52
514k
    for (size_t y = 0; y < LF_COLS; y++) {
53
771k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
514k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
514k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
514k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
514k
      }
58
257k
    }
59
257k
  }
60
61
257k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
257k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
257k
                                  scratch_space);
64
257k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
382k
                                   const size_t output_stride, float* scratch) {
40
382k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
382k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
382k
  float* block = scratch;
43
382k
  if (ROWS < COLS) {
44
764k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.14M
      for (size_t x = 0; x < LF_COLS; x++) {
46
764k
        block[y * COLS + x] = input[y * input_stride + x] *
47
764k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
764k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
764k
      }
50
382k
    }
51
382k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
382k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
382k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
382k
                                  scratch_space);
64
382k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
153k
                                   const size_t output_stride, float* scratch) {
40
153k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
153k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
153k
  float* block = scratch;
43
153k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
153k
  } else {
52
459k
    for (size_t y = 0; y < LF_COLS; y++) {
53
918k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
612k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
612k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
612k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
612k
      }
58
306k
    }
59
153k
  }
60
61
153k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
153k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
153k
                                  scratch_space);
64
153k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
65.4k
                                   const size_t output_stride, float* scratch) {
40
65.4k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
65.4k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
65.4k
  float* block = scratch;
43
65.4k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
65.4k
  } else {
52
196k
    for (size_t y = 0; y < LF_COLS; y++) {
53
654k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
523k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
523k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
523k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
523k
      }
58
130k
    }
59
65.4k
  }
60
61
65.4k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
65.4k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
65.4k
                                  scratch_space);
64
65.4k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
106k
                                   const size_t output_stride, float* scratch) {
40
106k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
106k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
106k
  float* block = scratch;
43
106k
  if (ROWS < COLS) {
44
318k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.06M
      for (size_t x = 0; x < LF_COLS; x++) {
46
849k
        block[y * COLS + x] = input[y * input_stride + x] *
47
849k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
849k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
849k
      }
50
212k
    }
51
106k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
106k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
106k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
106k
                                  scratch_space);
64
106k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
194k
                                   const size_t output_stride, float* scratch) {
40
194k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
194k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
194k
  float* block = scratch;
43
194k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
194k
  } else {
52
972k
    for (size_t y = 0; y < LF_COLS; y++) {
53
3.88M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
3.11M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
3.11M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
3.11M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
3.11M
      }
58
777k
    }
59
194k
  }
60
61
194k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
194k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
194k
                                  scratch_space);
64
194k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
6.48k
                                   const size_t output_stride, float* scratch) {
40
6.48k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
6.48k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
6.48k
  float* block = scratch;
43
6.48k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
6.48k
  } else {
52
32.4k
    for (size_t y = 0; y < LF_COLS; y++) {
53
233k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
207k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
207k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
207k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
207k
      }
58
25.9k
    }
59
6.48k
  }
60
61
6.48k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
6.48k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
6.48k
                                  scratch_space);
64
6.48k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
9.15k
                                   const size_t output_stride, float* scratch) {
40
9.15k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
9.15k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
9.15k
  float* block = scratch;
43
9.15k
  if (ROWS < COLS) {
44
45.7k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
329k
      for (size_t x = 0; x < LF_COLS; x++) {
46
293k
        block[y * COLS + x] = input[y * input_stride + x] *
47
293k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
293k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
293k
      }
50
36.6k
    }
51
9.15k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
9.15k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
9.15k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
9.15k
                                  scratch_space);
64
9.15k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
52.2k
                                   const size_t output_stride, float* scratch) {
40
52.2k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
52.2k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
52.2k
  float* block = scratch;
43
52.2k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
52.2k
  } else {
52
469k
    for (size_t y = 0; y < LF_COLS; y++) {
53
3.75M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
3.34M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
3.34M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
3.34M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
3.34M
      }
58
417k
    }
59
52.2k
  }
60
61
52.2k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
52.2k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
52.2k
                                  scratch_space);
64
52.2k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
257k
                                   const size_t output_stride, float* scratch) {
40
257k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
257k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
257k
  float* block = scratch;
43
257k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
257k
  } else {
52
514k
    for (size_t y = 0; y < LF_COLS; y++) {
53
771k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
514k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
514k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
514k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
514k
      }
58
257k
    }
59
257k
  }
60
61
257k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
257k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
257k
                                  scratch_space);
64
257k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
382k
                                   const size_t output_stride, float* scratch) {
40
382k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
382k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
382k
  float* block = scratch;
43
382k
  if (ROWS < COLS) {
44
764k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.14M
      for (size_t x = 0; x < LF_COLS; x++) {
46
764k
        block[y * COLS + x] = input[y * input_stride + x] *
47
764k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
764k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
764k
      }
50
382k
    }
51
382k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
382k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
382k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
382k
                                  scratch_space);
64
382k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
153k
                                   const size_t output_stride, float* scratch) {
40
153k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
153k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
153k
  float* block = scratch;
43
153k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
153k
  } else {
52
459k
    for (size_t y = 0; y < LF_COLS; y++) {
53
918k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
612k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
612k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
612k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
612k
      }
58
306k
    }
59
153k
  }
60
61
153k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
153k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
153k
                                  scratch_space);
64
153k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
65.4k
                                   const size_t output_stride, float* scratch) {
40
65.4k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
65.4k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
65.4k
  float* block = scratch;
43
65.4k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
65.4k
  } else {
52
196k
    for (size_t y = 0; y < LF_COLS; y++) {
53
654k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
523k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
523k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
523k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
523k
      }
58
130k
    }
59
65.4k
  }
60
61
65.4k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
65.4k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
65.4k
                                  scratch_space);
64
65.4k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
106k
                                   const size_t output_stride, float* scratch) {
40
106k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
106k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
106k
  float* block = scratch;
43
106k
  if (ROWS < COLS) {
44
318k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.06M
      for (size_t x = 0; x < LF_COLS; x++) {
46
849k
        block[y * COLS + x] = input[y * input_stride + x] *
47
849k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
849k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
849k
      }
50
212k
    }
51
106k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
106k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
106k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
106k
                                  scratch_space);
64
106k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
194k
                                   const size_t output_stride, float* scratch) {
40
194k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
194k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
194k
  float* block = scratch;
43
194k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
194k
  } else {
52
972k
    for (size_t y = 0; y < LF_COLS; y++) {
53
3.88M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
3.11M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
3.11M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
3.11M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
3.11M
      }
58
777k
    }
59
194k
  }
60
61
194k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
194k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
194k
                                  scratch_space);
64
194k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
6.48k
                                   const size_t output_stride, float* scratch) {
40
6.48k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
6.48k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
6.48k
  float* block = scratch;
43
6.48k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
6.48k
  } else {
52
32.4k
    for (size_t y = 0; y < LF_COLS; y++) {
53
233k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
207k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
207k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
207k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
207k
      }
58
25.9k
    }
59
6.48k
  }
60
61
6.48k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
6.48k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
6.48k
                                  scratch_space);
64
6.48k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
9.15k
                                   const size_t output_stride, float* scratch) {
40
9.15k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
9.15k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
9.15k
  float* block = scratch;
43
9.15k
  if (ROWS < COLS) {
44
45.7k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
329k
      for (size_t x = 0; x < LF_COLS; x++) {
46
293k
        block[y * COLS + x] = input[y * input_stride + x] *
47
293k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
293k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
293k
      }
50
36.6k
    }
51
9.15k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
9.15k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
9.15k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
9.15k
                                  scratch_space);
64
9.15k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
52.2k
                                   const size_t output_stride, float* scratch) {
40
52.2k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
52.2k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
52.2k
  float* block = scratch;
43
52.2k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
52.2k
  } else {
52
469k
    for (size_t y = 0; y < LF_COLS; y++) {
53
3.75M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
3.34M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
3.34M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
3.34M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
3.34M
      }
58
417k
    }
59
52.2k
  }
60
61
52.2k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
52.2k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
52.2k
                                  scratch_space);
64
52.2k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
65
66
template <size_t S>
67
58.1M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
58.1M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
58.1M
  static_assert(S % 2 == 0, "S should be even");
70
58.1M
  float temp[kDCTBlockSize];
71
58.1M
  constexpr size_t num_2x2 = S / 2;
72
193M
  for (size_t y = 0; y < num_2x2; y++) {
73
543M
    for (size_t x = 0; x < num_2x2; x++) {
74
407M
      float c00 = block[y * 2 * stride + x * 2];
75
407M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
407M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
407M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
407M
      float r00 = c00 + c01 + c10 + c11;
79
407M
      float r01 = c00 + c01 - c10 - c11;
80
407M
      float r10 = c00 - c01 + c10 - c11;
81
407M
      float r11 = c00 - c01 - c10 + c11;
82
407M
      r00 *= 0.25f;
83
407M
      r01 *= 0.25f;
84
407M
      r10 *= 0.25f;
85
407M
      r11 *= 0.25f;
86
407M
      temp[y * kBlockDim + x] = r00;
87
407M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
407M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
407M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
407M
    }
91
135M
  }
92
329M
  for (size_t y = 0; y < S; y++) {
93
1.90G
    for (size_t x = 0; x < S; x++) {
94
1.62G
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
1.62G
    }
96
271M
  }
97
58.1M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.04M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.04M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.04M
  static_assert(S % 2 == 0, "S should be even");
70
1.04M
  float temp[kDCTBlockSize];
71
1.04M
  constexpr size_t num_2x2 = S / 2;
72
5.23M
  for (size_t y = 0; y < num_2x2; y++) {
73
20.9M
    for (size_t x = 0; x < num_2x2; x++) {
74
16.7M
      float c00 = block[y * 2 * stride + x * 2];
75
16.7M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
16.7M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
16.7M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
16.7M
      float r00 = c00 + c01 + c10 + c11;
79
16.7M
      float r01 = c00 + c01 - c10 - c11;
80
16.7M
      float r10 = c00 - c01 + c10 - c11;
81
16.7M
      float r11 = c00 - c01 - c10 + c11;
82
16.7M
      r00 *= 0.25f;
83
16.7M
      r01 *= 0.25f;
84
16.7M
      r10 *= 0.25f;
85
16.7M
      r11 *= 0.25f;
86
16.7M
      temp[y * kBlockDim + x] = r00;
87
16.7M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
16.7M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
16.7M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
16.7M
    }
91
4.18M
  }
92
9.42M
  for (size_t y = 0; y < S; y++) {
93
75.3M
    for (size_t x = 0; x < S; x++) {
94
66.9M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
66.9M
    }
96
8.37M
  }
97
1.04M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.04M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.04M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.04M
  static_assert(S % 2 == 0, "S should be even");
70
1.04M
  float temp[kDCTBlockSize];
71
1.04M
  constexpr size_t num_2x2 = S / 2;
72
3.14M
  for (size_t y = 0; y < num_2x2; y++) {
73
6.28M
    for (size_t x = 0; x < num_2x2; x++) {
74
4.18M
      float c00 = block[y * 2 * stride + x * 2];
75
4.18M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
4.18M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
4.18M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
4.18M
      float r00 = c00 + c01 + c10 + c11;
79
4.18M
      float r01 = c00 + c01 - c10 - c11;
80
4.18M
      float r10 = c00 - c01 + c10 - c11;
81
4.18M
      float r11 = c00 - c01 - c10 + c11;
82
4.18M
      r00 *= 0.25f;
83
4.18M
      r01 *= 0.25f;
84
4.18M
      r10 *= 0.25f;
85
4.18M
      r11 *= 0.25f;
86
4.18M
      temp[y * kBlockDim + x] = r00;
87
4.18M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
4.18M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
4.18M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
4.18M
    }
91
2.09M
  }
92
5.23M
  for (size_t y = 0; y < S; y++) {
93
20.9M
    for (size_t x = 0; x < S; x++) {
94
16.7M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
16.7M
    }
96
4.18M
  }
97
1.04M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.04M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.04M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.04M
  static_assert(S % 2 == 0, "S should be even");
70
1.04M
  float temp[kDCTBlockSize];
71
1.04M
  constexpr size_t num_2x2 = S / 2;
72
2.09M
  for (size_t y = 0; y < num_2x2; y++) {
73
2.09M
    for (size_t x = 0; x < num_2x2; x++) {
74
1.04M
      float c00 = block[y * 2 * stride + x * 2];
75
1.04M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
1.04M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
1.04M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
1.04M
      float r00 = c00 + c01 + c10 + c11;
79
1.04M
      float r01 = c00 + c01 - c10 - c11;
80
1.04M
      float r10 = c00 - c01 + c10 - c11;
81
1.04M
      float r11 = c00 - c01 - c10 + c11;
82
1.04M
      r00 *= 0.25f;
83
1.04M
      r01 *= 0.25f;
84
1.04M
      r10 *= 0.25f;
85
1.04M
      r11 *= 0.25f;
86
1.04M
      temp[y * kBlockDim + x] = r00;
87
1.04M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
1.04M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
1.04M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
1.04M
    }
91
1.04M
  }
92
3.14M
  for (size_t y = 0; y < S; y++) {
93
6.28M
    for (size_t x = 0; x < S; x++) {
94
4.18M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
4.18M
    }
96
2.09M
  }
97
1.04M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
17.2M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
17.2M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
17.2M
  static_assert(S % 2 == 0, "S should be even");
70
17.2M
  float temp[kDCTBlockSize];
71
17.2M
  constexpr size_t num_2x2 = S / 2;
72
86.4M
  for (size_t y = 0; y < num_2x2; y++) {
73
345M
    for (size_t x = 0; x < num_2x2; x++) {
74
276M
      float c00 = block[y * 2 * stride + x * 2];
75
276M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
276M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
276M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
276M
      float r00 = c00 + c01 + c10 + c11;
79
276M
      float r01 = c00 + c01 - c10 - c11;
80
276M
      float r10 = c00 - c01 + c10 - c11;
81
276M
      float r11 = c00 - c01 - c10 + c11;
82
276M
      r00 *= 0.25f;
83
276M
      r01 *= 0.25f;
84
276M
      r10 *= 0.25f;
85
276M
      r11 *= 0.25f;
86
276M
      temp[y * kBlockDim + x] = r00;
87
276M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
276M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
276M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
276M
    }
91
69.1M
  }
92
155M
  for (size_t y = 0; y < S; y++) {
93
1.24G
    for (size_t x = 0; x < S; x++) {
94
1.10G
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
1.10G
    }
96
138M
  }
97
17.2M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
17.2M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
17.2M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
17.2M
  static_assert(S % 2 == 0, "S should be even");
70
17.2M
  float temp[kDCTBlockSize];
71
17.2M
  constexpr size_t num_2x2 = S / 2;
72
51.8M
  for (size_t y = 0; y < num_2x2; y++) {
73
103M
    for (size_t x = 0; x < num_2x2; x++) {
74
69.1M
      float c00 = block[y * 2 * stride + x * 2];
75
69.1M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
69.1M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
69.1M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
69.1M
      float r00 = c00 + c01 + c10 + c11;
79
69.1M
      float r01 = c00 + c01 - c10 - c11;
80
69.1M
      float r10 = c00 - c01 + c10 - c11;
81
69.1M
      float r11 = c00 - c01 - c10 + c11;
82
69.1M
      r00 *= 0.25f;
83
69.1M
      r01 *= 0.25f;
84
69.1M
      r10 *= 0.25f;
85
69.1M
      r11 *= 0.25f;
86
69.1M
      temp[y * kBlockDim + x] = r00;
87
69.1M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
69.1M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
69.1M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
69.1M
    }
91
34.5M
  }
92
86.4M
  for (size_t y = 0; y < S; y++) {
93
345M
    for (size_t x = 0; x < S; x++) {
94
276M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
276M
    }
96
69.1M
  }
97
17.2M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
17.2M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
17.2M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
17.2M
  static_assert(S % 2 == 0, "S should be even");
70
17.2M
  float temp[kDCTBlockSize];
71
17.2M
  constexpr size_t num_2x2 = S / 2;
72
34.5M
  for (size_t y = 0; y < num_2x2; y++) {
73
34.5M
    for (size_t x = 0; x < num_2x2; x++) {
74
17.2M
      float c00 = block[y * 2 * stride + x * 2];
75
17.2M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
17.2M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
17.2M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
17.2M
      float r00 = c00 + c01 + c10 + c11;
79
17.2M
      float r01 = c00 + c01 - c10 - c11;
80
17.2M
      float r10 = c00 - c01 + c10 - c11;
81
17.2M
      float r11 = c00 - c01 - c10 + c11;
82
17.2M
      r00 *= 0.25f;
83
17.2M
      r01 *= 0.25f;
84
17.2M
      r10 *= 0.25f;
85
17.2M
      r11 *= 0.25f;
86
17.2M
      temp[y * kBlockDim + x] = r00;
87
17.2M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
17.2M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
17.2M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
17.2M
    }
91
17.2M
  }
92
51.8M
  for (size_t y = 0; y < S; y++) {
93
103M
    for (size_t x = 0; x < S; x++) {
94
69.1M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
69.1M
    }
96
34.5M
  }
97
17.2M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.04M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.04M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.04M
  static_assert(S % 2 == 0, "S should be even");
70
1.04M
  float temp[kDCTBlockSize];
71
1.04M
  constexpr size_t num_2x2 = S / 2;
72
5.23M
  for (size_t y = 0; y < num_2x2; y++) {
73
20.9M
    for (size_t x = 0; x < num_2x2; x++) {
74
16.7M
      float c00 = block[y * 2 * stride + x * 2];
75
16.7M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
16.7M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
16.7M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
16.7M
      float r00 = c00 + c01 + c10 + c11;
79
16.7M
      float r01 = c00 + c01 - c10 - c11;
80
16.7M
      float r10 = c00 - c01 + c10 - c11;
81
16.7M
      float r11 = c00 - c01 - c10 + c11;
82
16.7M
      r00 *= 0.25f;
83
16.7M
      r01 *= 0.25f;
84
16.7M
      r10 *= 0.25f;
85
16.7M
      r11 *= 0.25f;
86
16.7M
      temp[y * kBlockDim + x] = r00;
87
16.7M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
16.7M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
16.7M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
16.7M
    }
91
4.18M
  }
92
9.42M
  for (size_t y = 0; y < S; y++) {
93
75.3M
    for (size_t x = 0; x < S; x++) {
94
66.9M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
66.9M
    }
96
8.37M
  }
97
1.04M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.04M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.04M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.04M
  static_assert(S % 2 == 0, "S should be even");
70
1.04M
  float temp[kDCTBlockSize];
71
1.04M
  constexpr size_t num_2x2 = S / 2;
72
3.14M
  for (size_t y = 0; y < num_2x2; y++) {
73
6.28M
    for (size_t x = 0; x < num_2x2; x++) {
74
4.18M
      float c00 = block[y * 2 * stride + x * 2];
75
4.18M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
4.18M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
4.18M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
4.18M
      float r00 = c00 + c01 + c10 + c11;
79
4.18M
      float r01 = c00 + c01 - c10 - c11;
80
4.18M
      float r10 = c00 - c01 + c10 - c11;
81
4.18M
      float r11 = c00 - c01 - c10 + c11;
82
4.18M
      r00 *= 0.25f;
83
4.18M
      r01 *= 0.25f;
84
4.18M
      r10 *= 0.25f;
85
4.18M
      r11 *= 0.25f;
86
4.18M
      temp[y * kBlockDim + x] = r00;
87
4.18M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
4.18M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
4.18M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
4.18M
    }
91
2.09M
  }
92
5.23M
  for (size_t y = 0; y < S; y++) {
93
20.9M
    for (size_t x = 0; x < S; x++) {
94
16.7M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
16.7M
    }
96
4.18M
  }
97
1.04M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.04M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.04M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.04M
  static_assert(S % 2 == 0, "S should be even");
70
1.04M
  float temp[kDCTBlockSize];
71
1.04M
  constexpr size_t num_2x2 = S / 2;
72
2.09M
  for (size_t y = 0; y < num_2x2; y++) {
73
2.09M
    for (size_t x = 0; x < num_2x2; x++) {
74
1.04M
      float c00 = block[y * 2 * stride + x * 2];
75
1.04M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
1.04M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
1.04M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
1.04M
      float r00 = c00 + c01 + c10 + c11;
79
1.04M
      float r01 = c00 + c01 - c10 - c11;
80
1.04M
      float r10 = c00 - c01 + c10 - c11;
81
1.04M
      float r11 = c00 - c01 - c10 + c11;
82
1.04M
      r00 *= 0.25f;
83
1.04M
      r01 *= 0.25f;
84
1.04M
      r10 *= 0.25f;
85
1.04M
      r11 *= 0.25f;
86
1.04M
      temp[y * kBlockDim + x] = r00;
87
1.04M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
1.04M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
1.04M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
1.04M
    }
91
1.04M
  }
92
3.14M
  for (size_t y = 0; y < S; y++) {
93
6.28M
    for (size_t x = 0; x < S; x++) {
94
4.18M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
4.18M
    }
96
2.09M
  }
97
1.04M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
98
99
72.3M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
72.3M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
72.3M
      {
102
72.3M
          0.2500000000000000,
103
72.3M
          0.8769029297991420f,
104
72.3M
          0.0000000000000000,
105
72.3M
          0.0000000000000000,
106
72.3M
          0.0000000000000000,
107
72.3M
          -0.4105377591765233f,
108
72.3M
          0.0000000000000000,
109
72.3M
          0.0000000000000000,
110
72.3M
          0.0000000000000000,
111
72.3M
          0.0000000000000000,
112
72.3M
          0.0000000000000000,
113
72.3M
          0.0000000000000000,
114
72.3M
          0.0000000000000000,
115
72.3M
          0.0000000000000000,
116
72.3M
          0.0000000000000000,
117
72.3M
          0.0000000000000000,
118
72.3M
      },
119
72.3M
      {
120
72.3M
          0.2500000000000000,
121
72.3M
          0.2206518106944235f,
122
72.3M
          0.0000000000000000,
123
72.3M
          0.0000000000000000,
124
72.3M
          -0.7071067811865474f,
125
72.3M
          0.6235485373547691f,
126
72.3M
          0.0000000000000000,
127
72.3M
          0.0000000000000000,
128
72.3M
          0.0000000000000000,
129
72.3M
          0.0000000000000000,
130
72.3M
          0.0000000000000000,
131
72.3M
          0.0000000000000000,
132
72.3M
          0.0000000000000000,
133
72.3M
          0.0000000000000000,
134
72.3M
          0.0000000000000000,
135
72.3M
          0.0000000000000000,
136
72.3M
      },
137
72.3M
      {
138
72.3M
          0.2500000000000000,
139
72.3M
          -0.1014005039375376f,
140
72.3M
          0.4067007583026075f,
141
72.3M
          -0.2125574805828875f,
142
72.3M
          0.0000000000000000,
143
72.3M
          -0.0643507165794627f,
144
72.3M
          -0.4517556589999482f,
145
72.3M
          -0.3046847507248690f,
146
72.3M
          0.3017929516615495f,
147
72.3M
          0.4082482904638627f,
148
72.3M
          0.1747866975480809f,
149
72.3M
          -0.2110560104933578f,
150
72.3M
          -0.1426608480880726f,
151
72.3M
          -0.1381354035075859f,
152
72.3M
          -0.1743760259965107f,
153
72.3M
          0.1135498731499434f,
154
72.3M
      },
155
72.3M
      {
156
72.3M
          0.2500000000000000,
157
72.3M
          -0.1014005039375375f,
158
72.3M
          0.4444481661973445f,
159
72.3M
          0.3085497062849767f,
160
72.3M
          0.0000000000000000f,
161
72.3M
          -0.0643507165794627f,
162
72.3M
          0.1585450355184006f,
163
72.3M
          0.5112616136591823f,
164
72.3M
          0.2579236279634118f,
165
72.3M
          0.0000000000000000,
166
72.3M
          0.0812611176717539f,
167
72.3M
          0.1856718091610980f,
168
72.3M
          -0.3416446842253372f,
169
72.3M
          0.3302282550303788f,
170
72.3M
          0.0702790691196284f,
171
72.3M
          -0.0741750459581035f,
172
72.3M
      },
173
72.3M
      {
174
72.3M
          0.2500000000000000,
175
72.3M
          0.2206518106944236f,
176
72.3M
          0.0000000000000000,
177
72.3M
          0.0000000000000000,
178
72.3M
          0.7071067811865476f,
179
72.3M
          0.6235485373547694f,
180
72.3M
          0.0000000000000000,
181
72.3M
          0.0000000000000000,
182
72.3M
          0.0000000000000000,
183
72.3M
          0.0000000000000000,
184
72.3M
          0.0000000000000000,
185
72.3M
          0.0000000000000000,
186
72.3M
          0.0000000000000000,
187
72.3M
          0.0000000000000000,
188
72.3M
          0.0000000000000000,
189
72.3M
          0.0000000000000000,
190
72.3M
      },
191
72.3M
      {
192
72.3M
          0.2500000000000000,
193
72.3M
          -0.1014005039375378f,
194
72.3M
          0.0000000000000000,
195
72.3M
          0.4706702258572536f,
196
72.3M
          0.0000000000000000,
197
72.3M
          -0.0643507165794628f,
198
72.3M
          -0.0403851516082220f,
199
72.3M
          0.0000000000000000,
200
72.3M
          0.1627234014286620f,
201
72.3M
          0.0000000000000000,
202
72.3M
          0.0000000000000000,
203
72.3M
          0.0000000000000000,
204
72.3M
          0.7367497537172237f,
205
72.3M
          0.0875511500058708f,
206
72.3M
          -0.2921026642334881f,
207
72.3M
          0.1940289303259434f,
208
72.3M
      },
209
72.3M
      {
210
72.3M
          0.2500000000000000,
211
72.3M
          -0.1014005039375377f,
212
72.3M
          0.1957439937204294f,
213
72.3M
          -0.1621205195722993f,
214
72.3M
          0.0000000000000000,
215
72.3M
          -0.0643507165794628f,
216
72.3M
          0.0074182263792424f,
217
72.3M
          -0.2904801297289980f,
218
72.3M
          0.0952002265347504f,
219
72.3M
          0.0000000000000000,
220
72.3M
          -0.3675398009862027f,
221
72.3M
          0.4921585901373873f,
222
72.3M
          0.2462710772207515f,
223
72.3M
          -0.0794670660590957f,
224
72.3M
          0.3623817333531167f,
225
72.3M
          -0.4351904965232280f,
226
72.3M
      },
227
72.3M
      {
228
72.3M
          0.2500000000000000,
229
72.3M
          -0.1014005039375376f,
230
72.3M
          0.2929100136981264f,
231
72.3M
          0.0000000000000000,
232
72.3M
          0.0000000000000000,
233
72.3M
          -0.0643507165794627f,
234
72.3M
          0.3935103426921017f,
235
72.3M
          -0.0657870154914280f,
236
72.3M
          0.0000000000000000,
237
72.3M
          -0.4082482904638628f,
238
72.3M
          -0.3078822139579090f,
239
72.3M
          -0.3852501370925192f,
240
72.3M
          -0.0857401903551931f,
241
72.3M
          -0.4613374887461511f,
242
72.3M
          0.0000000000000000,
243
72.3M
          0.2191868483885747f,
244
72.3M
      },
245
72.3M
      {
246
72.3M
          0.2500000000000000,
247
72.3M
          -0.1014005039375376f,
248
72.3M
          -0.4067007583026072f,
249
72.3M
          -0.2125574805828705f,
250
72.3M
          0.0000000000000000,
251
72.3M
          -0.0643507165794627f,
252
72.3M
          -0.4517556589999464f,
253
72.3M
          0.3046847507248840f,
254
72.3M
          0.3017929516615503f,
255
72.3M
          -0.4082482904638635f,
256
72.3M
          -0.1747866975480813f,
257
72.3M
          0.2110560104933581f,
258
72.3M
          -0.1426608480880734f,
259
72.3M
          -0.1381354035075829f,
260
72.3M
          -0.1743760259965108f,
261
72.3M
          0.1135498731499426f,
262
72.3M
      },
263
72.3M
      {
264
72.3M
          0.2500000000000000,
265
72.3M
          -0.1014005039375377f,
266
72.3M
          -0.1957439937204287f,
267
72.3M
          -0.1621205195722833f,
268
72.3M
          0.0000000000000000,
269
72.3M
          -0.0643507165794628f,
270
72.3M
          0.0074182263792444f,
271
72.3M
          0.2904801297290076f,
272
72.3M
          0.0952002265347505f,
273
72.3M
          0.0000000000000000,
274
72.3M
          0.3675398009862011f,
275
72.3M
          -0.4921585901373891f,
276
72.3M
          0.2462710772207514f,
277
72.3M
          -0.0794670660591026f,
278
72.3M
          0.3623817333531165f,
279
72.3M
          -0.4351904965232251f,
280
72.3M
      },
281
72.3M
      {
282
72.3M
          0.2500000000000000,
283
72.3M
          -0.1014005039375375f,
284
72.3M
          0.0000000000000000,
285
72.3M
          -0.4706702258572528f,
286
72.3M
          0.0000000000000000,
287
72.3M
          -0.0643507165794627f,
288
72.3M
          0.1107416575309343f,
289
72.3M
          0.0000000000000000,
290
72.3M
          -0.1627234014286617f,
291
72.3M
          0.0000000000000000,
292
72.3M
          0.0000000000000000,
293
72.3M
          0.0000000000000000,
294
72.3M
          0.1488339922711357f,
295
72.3M
          0.4972464710953509f,
296
72.3M
          0.2921026642334879f,
297
72.3M
          0.5550443808910661f,
298
72.3M
      },
299
72.3M
      {
300
72.3M
          0.2500000000000000,
301
72.3M
          -0.1014005039375377f,
302
72.3M
          0.1137907446044809f,
303
72.3M
          -0.1464291867126764f,
304
72.3M
          0.0000000000000000,
305
72.3M
          -0.0643507165794628f,
306
72.3M
          0.0829816309488205f,
307
72.3M
          -0.2388977352334460f,
308
72.3M
          -0.3531238544981630f,
309
72.3M
          -0.4082482904638630f,
310
72.3M
          0.4826689115059883f,
311
72.3M
          0.1741941265991622f,
312
72.3M
          -0.0476868035022925f,
313
72.3M
          0.1253805944856366f,
314
72.3M
          -0.4326608024727445f,
315
72.3M
          -0.2546827712406646f,
316
72.3M
      },
317
72.3M
      {
318
72.3M
          0.2500000000000000,
319
72.3M
          -0.1014005039375377f,
320
72.3M
          -0.4444481661973438f,
321
72.3M
          0.3085497062849487f,
322
72.3M
          0.0000000000000000,
323
72.3M
          -0.0643507165794628f,
324
72.3M
          0.1585450355183970f,
325
72.3M
          -0.5112616136592012f,
326
72.3M
          0.2579236279634129f,
327
72.3M
          0.0000000000000000,
328
72.3M
          -0.0812611176717504f,
329
72.3M
          -0.1856718091610990f,
330
72.3M
          -0.3416446842253373f,
331
72.3M
          0.3302282550303805f,
332
72.3M
          0.0702790691196282f,
333
72.3M
          -0.0741750459581023f,
334
72.3M
      },
335
72.3M
      {
336
72.3M
          0.2500000000000000,
337
72.3M
          -0.1014005039375376f,
338
72.3M
          -0.2929100136981264f,
339
72.3M
          0.0000000000000000,
340
72.3M
          0.0000000000000000,
341
72.3M
          -0.0643507165794627f,
342
72.3M
          0.3935103426921022f,
343
72.3M
          0.0657870154914254f,
344
72.3M
          0.0000000000000000,
345
72.3M
          0.4082482904638634f,
346
72.3M
          0.3078822139579031f,
347
72.3M
          0.3852501370925211f,
348
72.3M
          -0.0857401903551927f,
349
72.3M
          -0.4613374887461554f,
350
72.3M
          0.0000000000000000,
351
72.3M
          0.2191868483885728f,
352
72.3M
      },
353
72.3M
      {
354
72.3M
          0.2500000000000000,
355
72.3M
          -0.1014005039375376f,
356
72.3M
          -0.1137907446044814f,
357
72.3M
          -0.1464291867126654f,
358
72.3M
          0.0000000000000000,
359
72.3M
          -0.0643507165794627f,
360
72.3M
          0.0829816309488214f,
361
72.3M
          0.2388977352334547f,
362
72.3M
          -0.3531238544981624f,
363
72.3M
          0.4082482904638630f,
364
72.3M
          -0.4826689115059858f,
365
72.3M
          -0.1741941265991621f,
366
72.3M
          -0.0476868035022928f,
367
72.3M
          0.1253805944856431f,
368
72.3M
          -0.4326608024727457f,
369
72.3M
          -0.2546827712406641f,
370
72.3M
      },
371
72.3M
      {
372
72.3M
          0.2500000000000000,
373
72.3M
          -0.1014005039375374f,
374
72.3M
          0.0000000000000000,
375
72.3M
          0.4251149611657548f,
376
72.3M
          0.0000000000000000,
377
72.3M
          -0.0643507165794626f,
378
72.3M
          -0.4517556589999480f,
379
72.3M
          0.0000000000000000,
380
72.3M
          -0.6035859033230976f,
381
72.3M
          0.0000000000000000,
382
72.3M
          0.0000000000000000,
383
72.3M
          0.0000000000000000,
384
72.3M
          -0.1426608480880724f,
385
72.3M
          -0.1381354035075845f,
386
72.3M
          0.3487520519930227f,
387
72.3M
          0.1135498731499429f,
388
72.3M
      },
389
72.3M
  };
390
391
72.3M
  const HWY_CAPPED(float, 16) d;
392
217M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
144M
    auto scalar = Zero(d);
394
2.45G
    for (size_t j = 0; j < 16; j++) {
395
2.31G
      auto px = Set(d, pixels[j]);
396
2.31G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
2.31G
      scalar = MulAdd(px, basis, scalar);
398
2.31G
    }
399
144M
    Store(scalar, d, coeffs + i);
400
144M
  }
401
72.3M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
1.57M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
1.57M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
1.57M
      {
102
1.57M
          0.2500000000000000,
103
1.57M
          0.8769029297991420f,
104
1.57M
          0.0000000000000000,
105
1.57M
          0.0000000000000000,
106
1.57M
          0.0000000000000000,
107
1.57M
          -0.4105377591765233f,
108
1.57M
          0.0000000000000000,
109
1.57M
          0.0000000000000000,
110
1.57M
          0.0000000000000000,
111
1.57M
          0.0000000000000000,
112
1.57M
          0.0000000000000000,
113
1.57M
          0.0000000000000000,
114
1.57M
          0.0000000000000000,
115
1.57M
          0.0000000000000000,
116
1.57M
          0.0000000000000000,
117
1.57M
          0.0000000000000000,
118
1.57M
      },
119
1.57M
      {
120
1.57M
          0.2500000000000000,
121
1.57M
          0.2206518106944235f,
122
1.57M
          0.0000000000000000,
123
1.57M
          0.0000000000000000,
124
1.57M
          -0.7071067811865474f,
125
1.57M
          0.6235485373547691f,
126
1.57M
          0.0000000000000000,
127
1.57M
          0.0000000000000000,
128
1.57M
          0.0000000000000000,
129
1.57M
          0.0000000000000000,
130
1.57M
          0.0000000000000000,
131
1.57M
          0.0000000000000000,
132
1.57M
          0.0000000000000000,
133
1.57M
          0.0000000000000000,
134
1.57M
          0.0000000000000000,
135
1.57M
          0.0000000000000000,
136
1.57M
      },
137
1.57M
      {
138
1.57M
          0.2500000000000000,
139
1.57M
          -0.1014005039375376f,
140
1.57M
          0.4067007583026075f,
141
1.57M
          -0.2125574805828875f,
142
1.57M
          0.0000000000000000,
143
1.57M
          -0.0643507165794627f,
144
1.57M
          -0.4517556589999482f,
145
1.57M
          -0.3046847507248690f,
146
1.57M
          0.3017929516615495f,
147
1.57M
          0.4082482904638627f,
148
1.57M
          0.1747866975480809f,
149
1.57M
          -0.2110560104933578f,
150
1.57M
          -0.1426608480880726f,
151
1.57M
          -0.1381354035075859f,
152
1.57M
          -0.1743760259965107f,
153
1.57M
          0.1135498731499434f,
154
1.57M
      },
155
1.57M
      {
156
1.57M
          0.2500000000000000,
157
1.57M
          -0.1014005039375375f,
158
1.57M
          0.4444481661973445f,
159
1.57M
          0.3085497062849767f,
160
1.57M
          0.0000000000000000f,
161
1.57M
          -0.0643507165794627f,
162
1.57M
          0.1585450355184006f,
163
1.57M
          0.5112616136591823f,
164
1.57M
          0.2579236279634118f,
165
1.57M
          0.0000000000000000,
166
1.57M
          0.0812611176717539f,
167
1.57M
          0.1856718091610980f,
168
1.57M
          -0.3416446842253372f,
169
1.57M
          0.3302282550303788f,
170
1.57M
          0.0702790691196284f,
171
1.57M
          -0.0741750459581035f,
172
1.57M
      },
173
1.57M
      {
174
1.57M
          0.2500000000000000,
175
1.57M
          0.2206518106944236f,
176
1.57M
          0.0000000000000000,
177
1.57M
          0.0000000000000000,
178
1.57M
          0.7071067811865476f,
179
1.57M
          0.6235485373547694f,
180
1.57M
          0.0000000000000000,
181
1.57M
          0.0000000000000000,
182
1.57M
          0.0000000000000000,
183
1.57M
          0.0000000000000000,
184
1.57M
          0.0000000000000000,
185
1.57M
          0.0000000000000000,
186
1.57M
          0.0000000000000000,
187
1.57M
          0.0000000000000000,
188
1.57M
          0.0000000000000000,
189
1.57M
          0.0000000000000000,
190
1.57M
      },
191
1.57M
      {
192
1.57M
          0.2500000000000000,
193
1.57M
          -0.1014005039375378f,
194
1.57M
          0.0000000000000000,
195
1.57M
          0.4706702258572536f,
196
1.57M
          0.0000000000000000,
197
1.57M
          -0.0643507165794628f,
198
1.57M
          -0.0403851516082220f,
199
1.57M
          0.0000000000000000,
200
1.57M
          0.1627234014286620f,
201
1.57M
          0.0000000000000000,
202
1.57M
          0.0000000000000000,
203
1.57M
          0.0000000000000000,
204
1.57M
          0.7367497537172237f,
205
1.57M
          0.0875511500058708f,
206
1.57M
          -0.2921026642334881f,
207
1.57M
          0.1940289303259434f,
208
1.57M
      },
209
1.57M
      {
210
1.57M
          0.2500000000000000,
211
1.57M
          -0.1014005039375377f,
212
1.57M
          0.1957439937204294f,
213
1.57M
          -0.1621205195722993f,
214
1.57M
          0.0000000000000000,
215
1.57M
          -0.0643507165794628f,
216
1.57M
          0.0074182263792424f,
217
1.57M
          -0.2904801297289980f,
218
1.57M
          0.0952002265347504f,
219
1.57M
          0.0000000000000000,
220
1.57M
          -0.3675398009862027f,
221
1.57M
          0.4921585901373873f,
222
1.57M
          0.2462710772207515f,
223
1.57M
          -0.0794670660590957f,
224
1.57M
          0.3623817333531167f,
225
1.57M
          -0.4351904965232280f,
226
1.57M
      },
227
1.57M
      {
228
1.57M
          0.2500000000000000,
229
1.57M
          -0.1014005039375376f,
230
1.57M
          0.2929100136981264f,
231
1.57M
          0.0000000000000000,
232
1.57M
          0.0000000000000000,
233
1.57M
          -0.0643507165794627f,
234
1.57M
          0.3935103426921017f,
235
1.57M
          -0.0657870154914280f,
236
1.57M
          0.0000000000000000,
237
1.57M
          -0.4082482904638628f,
238
1.57M
          -0.3078822139579090f,
239
1.57M
          -0.3852501370925192f,
240
1.57M
          -0.0857401903551931f,
241
1.57M
          -0.4613374887461511f,
242
1.57M
          0.0000000000000000,
243
1.57M
          0.2191868483885747f,
244
1.57M
      },
245
1.57M
      {
246
1.57M
          0.2500000000000000,
247
1.57M
          -0.1014005039375376f,
248
1.57M
          -0.4067007583026072f,
249
1.57M
          -0.2125574805828705f,
250
1.57M
          0.0000000000000000,
251
1.57M
          -0.0643507165794627f,
252
1.57M
          -0.4517556589999464f,
253
1.57M
          0.3046847507248840f,
254
1.57M
          0.3017929516615503f,
255
1.57M
          -0.4082482904638635f,
256
1.57M
          -0.1747866975480813f,
257
1.57M
          0.2110560104933581f,
258
1.57M
          -0.1426608480880734f,
259
1.57M
          -0.1381354035075829f,
260
1.57M
          -0.1743760259965108f,
261
1.57M
          0.1135498731499426f,
262
1.57M
      },
263
1.57M
      {
264
1.57M
          0.2500000000000000,
265
1.57M
          -0.1014005039375377f,
266
1.57M
          -0.1957439937204287f,
267
1.57M
          -0.1621205195722833f,
268
1.57M
          0.0000000000000000,
269
1.57M
          -0.0643507165794628f,
270
1.57M
          0.0074182263792444f,
271
1.57M
          0.2904801297290076f,
272
1.57M
          0.0952002265347505f,
273
1.57M
          0.0000000000000000,
274
1.57M
          0.3675398009862011f,
275
1.57M
          -0.4921585901373891f,
276
1.57M
          0.2462710772207514f,
277
1.57M
          -0.0794670660591026f,
278
1.57M
          0.3623817333531165f,
279
1.57M
          -0.4351904965232251f,
280
1.57M
      },
281
1.57M
      {
282
1.57M
          0.2500000000000000,
283
1.57M
          -0.1014005039375375f,
284
1.57M
          0.0000000000000000,
285
1.57M
          -0.4706702258572528f,
286
1.57M
          0.0000000000000000,
287
1.57M
          -0.0643507165794627f,
288
1.57M
          0.1107416575309343f,
289
1.57M
          0.0000000000000000,
290
1.57M
          -0.1627234014286617f,
291
1.57M
          0.0000000000000000,
292
1.57M
          0.0000000000000000,
293
1.57M
          0.0000000000000000,
294
1.57M
          0.1488339922711357f,
295
1.57M
          0.4972464710953509f,
296
1.57M
          0.2921026642334879f,
297
1.57M
          0.5550443808910661f,
298
1.57M
      },
299
1.57M
      {
300
1.57M
          0.2500000000000000,
301
1.57M
          -0.1014005039375377f,
302
1.57M
          0.1137907446044809f,
303
1.57M
          -0.1464291867126764f,
304
1.57M
          0.0000000000000000,
305
1.57M
          -0.0643507165794628f,
306
1.57M
          0.0829816309488205f,
307
1.57M
          -0.2388977352334460f,
308
1.57M
          -0.3531238544981630f,
309
1.57M
          -0.4082482904638630f,
310
1.57M
          0.4826689115059883f,
311
1.57M
          0.1741941265991622f,
312
1.57M
          -0.0476868035022925f,
313
1.57M
          0.1253805944856366f,
314
1.57M
          -0.4326608024727445f,
315
1.57M
          -0.2546827712406646f,
316
1.57M
      },
317
1.57M
      {
318
1.57M
          0.2500000000000000,
319
1.57M
          -0.1014005039375377f,
320
1.57M
          -0.4444481661973438f,
321
1.57M
          0.3085497062849487f,
322
1.57M
          0.0000000000000000,
323
1.57M
          -0.0643507165794628f,
324
1.57M
          0.1585450355183970f,
325
1.57M
          -0.5112616136592012f,
326
1.57M
          0.2579236279634129f,
327
1.57M
          0.0000000000000000,
328
1.57M
          -0.0812611176717504f,
329
1.57M
          -0.1856718091610990f,
330
1.57M
          -0.3416446842253373f,
331
1.57M
          0.3302282550303805f,
332
1.57M
          0.0702790691196282f,
333
1.57M
          -0.0741750459581023f,
334
1.57M
      },
335
1.57M
      {
336
1.57M
          0.2500000000000000,
337
1.57M
          -0.1014005039375376f,
338
1.57M
          -0.2929100136981264f,
339
1.57M
          0.0000000000000000,
340
1.57M
          0.0000000000000000,
341
1.57M
          -0.0643507165794627f,
342
1.57M
          0.3935103426921022f,
343
1.57M
          0.0657870154914254f,
344
1.57M
          0.0000000000000000,
345
1.57M
          0.4082482904638634f,
346
1.57M
          0.3078822139579031f,
347
1.57M
          0.3852501370925211f,
348
1.57M
          -0.0857401903551927f,
349
1.57M
          -0.4613374887461554f,
350
1.57M
          0.0000000000000000,
351
1.57M
          0.2191868483885728f,
352
1.57M
      },
353
1.57M
      {
354
1.57M
          0.2500000000000000,
355
1.57M
          -0.1014005039375376f,
356
1.57M
          -0.1137907446044814f,
357
1.57M
          -0.1464291867126654f,
358
1.57M
          0.0000000000000000,
359
1.57M
          -0.0643507165794627f,
360
1.57M
          0.0829816309488214f,
361
1.57M
          0.2388977352334547f,
362
1.57M
          -0.3531238544981624f,
363
1.57M
          0.4082482904638630f,
364
1.57M
          -0.4826689115059858f,
365
1.57M
          -0.1741941265991621f,
366
1.57M
          -0.0476868035022928f,
367
1.57M
          0.1253805944856431f,
368
1.57M
          -0.4326608024727457f,
369
1.57M
          -0.2546827712406641f,
370
1.57M
      },
371
1.57M
      {
372
1.57M
          0.2500000000000000,
373
1.57M
          -0.1014005039375374f,
374
1.57M
          0.0000000000000000,
375
1.57M
          0.4251149611657548f,
376
1.57M
          0.0000000000000000,
377
1.57M
          -0.0643507165794626f,
378
1.57M
          -0.4517556589999480f,
379
1.57M
          0.0000000000000000,
380
1.57M
          -0.6035859033230976f,
381
1.57M
          0.0000000000000000,
382
1.57M
          0.0000000000000000,
383
1.57M
          0.0000000000000000,
384
1.57M
          -0.1426608480880724f,
385
1.57M
          -0.1381354035075845f,
386
1.57M
          0.3487520519930227f,
387
1.57M
          0.1135498731499429f,
388
1.57M
      },
389
1.57M
  };
390
391
1.57M
  const HWY_CAPPED(float, 16) d;
392
4.72M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
3.15M
    auto scalar = Zero(d);
394
53.5M
    for (size_t j = 0; j < 16; j++) {
395
50.4M
      auto px = Set(d, pixels[j]);
396
50.4M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
50.4M
      scalar = MulAdd(px, basis, scalar);
398
50.4M
    }
399
3.15M
    Store(scalar, d, coeffs + i);
400
3.15M
  }
401
1.57M
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
69.1M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
69.1M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
69.1M
      {
102
69.1M
          0.2500000000000000,
103
69.1M
          0.8769029297991420f,
104
69.1M
          0.0000000000000000,
105
69.1M
          0.0000000000000000,
106
69.1M
          0.0000000000000000,
107
69.1M
          -0.4105377591765233f,
108
69.1M
          0.0000000000000000,
109
69.1M
          0.0000000000000000,
110
69.1M
          0.0000000000000000,
111
69.1M
          0.0000000000000000,
112
69.1M
          0.0000000000000000,
113
69.1M
          0.0000000000000000,
114
69.1M
          0.0000000000000000,
115
69.1M
          0.0000000000000000,
116
69.1M
          0.0000000000000000,
117
69.1M
          0.0000000000000000,
118
69.1M
      },
119
69.1M
      {
120
69.1M
          0.2500000000000000,
121
69.1M
          0.2206518106944235f,
122
69.1M
          0.0000000000000000,
123
69.1M
          0.0000000000000000,
124
69.1M
          -0.7071067811865474f,
125
69.1M
          0.6235485373547691f,
126
69.1M
          0.0000000000000000,
127
69.1M
          0.0000000000000000,
128
69.1M
          0.0000000000000000,
129
69.1M
          0.0000000000000000,
130
69.1M
          0.0000000000000000,
131
69.1M
          0.0000000000000000,
132
69.1M
          0.0000000000000000,
133
69.1M
          0.0000000000000000,
134
69.1M
          0.0000000000000000,
135
69.1M
          0.0000000000000000,
136
69.1M
      },
137
69.1M
      {
138
69.1M
          0.2500000000000000,
139
69.1M
          -0.1014005039375376f,
140
69.1M
          0.4067007583026075f,
141
69.1M
          -0.2125574805828875f,
142
69.1M
          0.0000000000000000,
143
69.1M
          -0.0643507165794627f,
144
69.1M
          -0.4517556589999482f,
145
69.1M
          -0.3046847507248690f,
146
69.1M
          0.3017929516615495f,
147
69.1M
          0.4082482904638627f,
148
69.1M
          0.1747866975480809f,
149
69.1M
          -0.2110560104933578f,
150
69.1M
          -0.1426608480880726f,
151
69.1M
          -0.1381354035075859f,
152
69.1M
          -0.1743760259965107f,
153
69.1M
          0.1135498731499434f,
154
69.1M
      },
155
69.1M
      {
156
69.1M
          0.2500000000000000,
157
69.1M
          -0.1014005039375375f,
158
69.1M
          0.4444481661973445f,
159
69.1M
          0.3085497062849767f,
160
69.1M
          0.0000000000000000f,
161
69.1M
          -0.0643507165794627f,
162
69.1M
          0.1585450355184006f,
163
69.1M
          0.5112616136591823f,
164
69.1M
          0.2579236279634118f,
165
69.1M
          0.0000000000000000,
166
69.1M
          0.0812611176717539f,
167
69.1M
          0.1856718091610980f,
168
69.1M
          -0.3416446842253372f,
169
69.1M
          0.3302282550303788f,
170
69.1M
          0.0702790691196284f,
171
69.1M
          -0.0741750459581035f,
172
69.1M
      },
173
69.1M
      {
174
69.1M
          0.2500000000000000,
175
69.1M
          0.2206518106944236f,
176
69.1M
          0.0000000000000000,
177
69.1M
          0.0000000000000000,
178
69.1M
          0.7071067811865476f,
179
69.1M
          0.6235485373547694f,
180
69.1M
          0.0000000000000000,
181
69.1M
          0.0000000000000000,
182
69.1M
          0.0000000000000000,
183
69.1M
          0.0000000000000000,
184
69.1M
          0.0000000000000000,
185
69.1M
          0.0000000000000000,
186
69.1M
          0.0000000000000000,
187
69.1M
          0.0000000000000000,
188
69.1M
          0.0000000000000000,
189
69.1M
          0.0000000000000000,
190
69.1M
      },
191
69.1M
      {
192
69.1M
          0.2500000000000000,
193
69.1M
          -0.1014005039375378f,
194
69.1M
          0.0000000000000000,
195
69.1M
          0.4706702258572536f,
196
69.1M
          0.0000000000000000,
197
69.1M
          -0.0643507165794628f,
198
69.1M
          -0.0403851516082220f,
199
69.1M
          0.0000000000000000,
200
69.1M
          0.1627234014286620f,
201
69.1M
          0.0000000000000000,
202
69.1M
          0.0000000000000000,
203
69.1M
          0.0000000000000000,
204
69.1M
          0.7367497537172237f,
205
69.1M
          0.0875511500058708f,
206
69.1M
          -0.2921026642334881f,
207
69.1M
          0.1940289303259434f,
208
69.1M
      },
209
69.1M
      {
210
69.1M
          0.2500000000000000,
211
69.1M
          -0.1014005039375377f,
212
69.1M
          0.1957439937204294f,
213
69.1M
          -0.1621205195722993f,
214
69.1M
          0.0000000000000000,
215
69.1M
          -0.0643507165794628f,
216
69.1M
          0.0074182263792424f,
217
69.1M
          -0.2904801297289980f,
218
69.1M
          0.0952002265347504f,
219
69.1M
          0.0000000000000000,
220
69.1M
          -0.3675398009862027f,
221
69.1M
          0.4921585901373873f,
222
69.1M
          0.2462710772207515f,
223
69.1M
          -0.0794670660590957f,
224
69.1M
          0.3623817333531167f,
225
69.1M
          -0.4351904965232280f,
226
69.1M
      },
227
69.1M
      {
228
69.1M
          0.2500000000000000,
229
69.1M
          -0.1014005039375376f,
230
69.1M
          0.2929100136981264f,
231
69.1M
          0.0000000000000000,
232
69.1M
          0.0000000000000000,
233
69.1M
          -0.0643507165794627f,
234
69.1M
          0.3935103426921017f,
235
69.1M
          -0.0657870154914280f,
236
69.1M
          0.0000000000000000,
237
69.1M
          -0.4082482904638628f,
238
69.1M
          -0.3078822139579090f,
239
69.1M
          -0.3852501370925192f,
240
69.1M
          -0.0857401903551931f,
241
69.1M
          -0.4613374887461511f,
242
69.1M
          0.0000000000000000,
243
69.1M
          0.2191868483885747f,
244
69.1M
      },
245
69.1M
      {
246
69.1M
          0.2500000000000000,
247
69.1M
          -0.1014005039375376f,
248
69.1M
          -0.4067007583026072f,
249
69.1M
          -0.2125574805828705f,
250
69.1M
          0.0000000000000000,
251
69.1M
          -0.0643507165794627f,
252
69.1M
          -0.4517556589999464f,
253
69.1M
          0.3046847507248840f,
254
69.1M
          0.3017929516615503f,
255
69.1M
          -0.4082482904638635f,
256
69.1M
          -0.1747866975480813f,
257
69.1M
          0.2110560104933581f,
258
69.1M
          -0.1426608480880734f,
259
69.1M
          -0.1381354035075829f,
260
69.1M
          -0.1743760259965108f,
261
69.1M
          0.1135498731499426f,
262
69.1M
      },
263
69.1M
      {
264
69.1M
          0.2500000000000000,
265
69.1M
          -0.1014005039375377f,
266
69.1M
          -0.1957439937204287f,
267
69.1M
          -0.1621205195722833f,
268
69.1M
          0.0000000000000000,
269
69.1M
          -0.0643507165794628f,
270
69.1M
          0.0074182263792444f,
271
69.1M
          0.2904801297290076f,
272
69.1M
          0.0952002265347505f,
273
69.1M
          0.0000000000000000,
274
69.1M
          0.3675398009862011f,
275
69.1M
          -0.4921585901373891f,
276
69.1M
          0.2462710772207514f,
277
69.1M
          -0.0794670660591026f,
278
69.1M
          0.3623817333531165f,
279
69.1M
          -0.4351904965232251f,
280
69.1M
      },
281
69.1M
      {
282
69.1M
          0.2500000000000000,
283
69.1M
          -0.1014005039375375f,
284
69.1M
          0.0000000000000000,
285
69.1M
          -0.4706702258572528f,
286
69.1M
          0.0000000000000000,
287
69.1M
          -0.0643507165794627f,
288
69.1M
          0.1107416575309343f,
289
69.1M
          0.0000000000000000,
290
69.1M
          -0.1627234014286617f,
291
69.1M
          0.0000000000000000,
292
69.1M
          0.0000000000000000,
293
69.1M
          0.0000000000000000,
294
69.1M
          0.1488339922711357f,
295
69.1M
          0.4972464710953509f,
296
69.1M
          0.2921026642334879f,
297
69.1M
          0.5550443808910661f,
298
69.1M
      },
299
69.1M
      {
300
69.1M
          0.2500000000000000,
301
69.1M
          -0.1014005039375377f,
302
69.1M
          0.1137907446044809f,
303
69.1M
          -0.1464291867126764f,
304
69.1M
          0.0000000000000000,
305
69.1M
          -0.0643507165794628f,
306
69.1M
          0.0829816309488205f,
307
69.1M
          -0.2388977352334460f,
308
69.1M
          -0.3531238544981630f,
309
69.1M
          -0.4082482904638630f,
310
69.1M
          0.4826689115059883f,
311
69.1M
          0.1741941265991622f,
312
69.1M
          -0.0476868035022925f,
313
69.1M
          0.1253805944856366f,
314
69.1M
          -0.4326608024727445f,
315
69.1M
          -0.2546827712406646f,
316
69.1M
      },
317
69.1M
      {
318
69.1M
          0.2500000000000000,
319
69.1M
          -0.1014005039375377f,
320
69.1M
          -0.4444481661973438f,
321
69.1M
          0.3085497062849487f,
322
69.1M
          0.0000000000000000,
323
69.1M
          -0.0643507165794628f,
324
69.1M
          0.1585450355183970f,
325
69.1M
          -0.5112616136592012f,
326
69.1M
          0.2579236279634129f,
327
69.1M
          0.0000000000000000,
328
69.1M
          -0.0812611176717504f,
329
69.1M
          -0.1856718091610990f,
330
69.1M
          -0.3416446842253373f,
331
69.1M
          0.3302282550303805f,
332
69.1M
          0.0702790691196282f,
333
69.1M
          -0.0741750459581023f,
334
69.1M
      },
335
69.1M
      {
336
69.1M
          0.2500000000000000,
337
69.1M
          -0.1014005039375376f,
338
69.1M
          -0.2929100136981264f,
339
69.1M
          0.0000000000000000,
340
69.1M
          0.0000000000000000,
341
69.1M
          -0.0643507165794627f,
342
69.1M
          0.3935103426921022f,
343
69.1M
          0.0657870154914254f,
344
69.1M
          0.0000000000000000,
345
69.1M
          0.4082482904638634f,
346
69.1M
          0.3078822139579031f,
347
69.1M
          0.3852501370925211f,
348
69.1M
          -0.0857401903551927f,
349
69.1M
          -0.4613374887461554f,
350
69.1M
          0.0000000000000000,
351
69.1M
          0.2191868483885728f,
352
69.1M
      },
353
69.1M
      {
354
69.1M
          0.2500000000000000,
355
69.1M
          -0.1014005039375376f,
356
69.1M
          -0.1137907446044814f,
357
69.1M
          -0.1464291867126654f,
358
69.1M
          0.0000000000000000,
359
69.1M
          -0.0643507165794627f,
360
69.1M
          0.0829816309488214f,
361
69.1M
          0.2388977352334547f,
362
69.1M
          -0.3531238544981624f,
363
69.1M
          0.4082482904638630f,
364
69.1M
          -0.4826689115059858f,
365
69.1M
          -0.1741941265991621f,
366
69.1M
          -0.0476868035022928f,
367
69.1M
          0.1253805944856431f,
368
69.1M
          -0.4326608024727457f,
369
69.1M
          -0.2546827712406641f,
370
69.1M
      },
371
69.1M
      {
372
69.1M
          0.2500000000000000,
373
69.1M
          -0.1014005039375374f,
374
69.1M
          0.0000000000000000,
375
69.1M
          0.4251149611657548f,
376
69.1M
          0.0000000000000000,
377
69.1M
          -0.0643507165794626f,
378
69.1M
          -0.4517556589999480f,
379
69.1M
          0.0000000000000000,
380
69.1M
          -0.6035859033230976f,
381
69.1M
          0.0000000000000000,
382
69.1M
          0.0000000000000000,
383
69.1M
          0.0000000000000000,
384
69.1M
          -0.1426608480880724f,
385
69.1M
          -0.1381354035075845f,
386
69.1M
          0.3487520519930227f,
387
69.1M
          0.1135498731499429f,
388
69.1M
      },
389
69.1M
  };
390
391
69.1M
  const HWY_CAPPED(float, 16) d;
392
207M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
138M
    auto scalar = Zero(d);
394
2.35G
    for (size_t j = 0; j < 16; j++) {
395
2.21G
      auto px = Set(d, pixels[j]);
396
2.21G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
2.21G
      scalar = MulAdd(px, basis, scalar);
398
2.21G
    }
399
138M
    Store(scalar, d, coeffs + i);
400
138M
  }
401
69.1M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
1.57M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
1.57M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
1.57M
      {
102
1.57M
          0.2500000000000000,
103
1.57M
          0.8769029297991420f,
104
1.57M
          0.0000000000000000,
105
1.57M
          0.0000000000000000,
106
1.57M
          0.0000000000000000,
107
1.57M
          -0.4105377591765233f,
108
1.57M
          0.0000000000000000,
109
1.57M
          0.0000000000000000,
110
1.57M
          0.0000000000000000,
111
1.57M
          0.0000000000000000,
112
1.57M
          0.0000000000000000,
113
1.57M
          0.0000000000000000,
114
1.57M
          0.0000000000000000,
115
1.57M
          0.0000000000000000,
116
1.57M
          0.0000000000000000,
117
1.57M
          0.0000000000000000,
118
1.57M
      },
119
1.57M
      {
120
1.57M
          0.2500000000000000,
121
1.57M
          0.2206518106944235f,
122
1.57M
          0.0000000000000000,
123
1.57M
          0.0000000000000000,
124
1.57M
          -0.7071067811865474f,
125
1.57M
          0.6235485373547691f,
126
1.57M
          0.0000000000000000,
127
1.57M
          0.0000000000000000,
128
1.57M
          0.0000000000000000,
129
1.57M
          0.0000000000000000,
130
1.57M
          0.0000000000000000,
131
1.57M
          0.0000000000000000,
132
1.57M
          0.0000000000000000,
133
1.57M
          0.0000000000000000,
134
1.57M
          0.0000000000000000,
135
1.57M
          0.0000000000000000,
136
1.57M
      },
137
1.57M
      {
138
1.57M
          0.2500000000000000,
139
1.57M
          -0.1014005039375376f,
140
1.57M
          0.4067007583026075f,
141
1.57M
          -0.2125574805828875f,
142
1.57M
          0.0000000000000000,
143
1.57M
          -0.0643507165794627f,
144
1.57M
          -0.4517556589999482f,
145
1.57M
          -0.3046847507248690f,
146
1.57M
          0.3017929516615495f,
147
1.57M
          0.4082482904638627f,
148
1.57M
          0.1747866975480809f,
149
1.57M
          -0.2110560104933578f,
150
1.57M
          -0.1426608480880726f,
151
1.57M
          -0.1381354035075859f,
152
1.57M
          -0.1743760259965107f,
153
1.57M
          0.1135498731499434f,
154
1.57M
      },
155
1.57M
      {
156
1.57M
          0.2500000000000000,
157
1.57M
          -0.1014005039375375f,
158
1.57M
          0.4444481661973445f,
159
1.57M
          0.3085497062849767f,
160
1.57M
          0.0000000000000000f,
161
1.57M
          -0.0643507165794627f,
162
1.57M
          0.1585450355184006f,
163
1.57M
          0.5112616136591823f,
164
1.57M
          0.2579236279634118f,
165
1.57M
          0.0000000000000000,
166
1.57M
          0.0812611176717539f,
167
1.57M
          0.1856718091610980f,
168
1.57M
          -0.3416446842253372f,
169
1.57M
          0.3302282550303788f,
170
1.57M
          0.0702790691196284f,
171
1.57M
          -0.0741750459581035f,
172
1.57M
      },
173
1.57M
      {
174
1.57M
          0.2500000000000000,
175
1.57M
          0.2206518106944236f,
176
1.57M
          0.0000000000000000,
177
1.57M
          0.0000000000000000,
178
1.57M
          0.7071067811865476f,
179
1.57M
          0.6235485373547694f,
180
1.57M
          0.0000000000000000,
181
1.57M
          0.0000000000000000,
182
1.57M
          0.0000000000000000,
183
1.57M
          0.0000000000000000,
184
1.57M
          0.0000000000000000,
185
1.57M
          0.0000000000000000,
186
1.57M
          0.0000000000000000,
187
1.57M
          0.0000000000000000,
188
1.57M
          0.0000000000000000,
189
1.57M
          0.0000000000000000,
190
1.57M
      },
191
1.57M
      {
192
1.57M
          0.2500000000000000,
193
1.57M
          -0.1014005039375378f,
194
1.57M
          0.0000000000000000,
195
1.57M
          0.4706702258572536f,
196
1.57M
          0.0000000000000000,
197
1.57M
          -0.0643507165794628f,
198
1.57M
          -0.0403851516082220f,
199
1.57M
          0.0000000000000000,
200
1.57M
          0.1627234014286620f,
201
1.57M
          0.0000000000000000,
202
1.57M
          0.0000000000000000,
203
1.57M
          0.0000000000000000,
204
1.57M
          0.7367497537172237f,
205
1.57M
          0.0875511500058708f,
206
1.57M
          -0.2921026642334881f,
207
1.57M
          0.1940289303259434f,
208
1.57M
      },
209
1.57M
      {
210
1.57M
          0.2500000000000000,
211
1.57M
          -0.1014005039375377f,
212
1.57M
          0.1957439937204294f,
213
1.57M
          -0.1621205195722993f,
214
1.57M
          0.0000000000000000,
215
1.57M
          -0.0643507165794628f,
216
1.57M
          0.0074182263792424f,
217
1.57M
          -0.2904801297289980f,
218
1.57M
          0.0952002265347504f,
219
1.57M
          0.0000000000000000,
220
1.57M
          -0.3675398009862027f,
221
1.57M
          0.4921585901373873f,
222
1.57M
          0.2462710772207515f,
223
1.57M
          -0.0794670660590957f,
224
1.57M
          0.3623817333531167f,
225
1.57M
          -0.4351904965232280f,
226
1.57M
      },
227
1.57M
      {
228
1.57M
          0.2500000000000000,
229
1.57M
          -0.1014005039375376f,
230
1.57M
          0.2929100136981264f,
231
1.57M
          0.0000000000000000,
232
1.57M
          0.0000000000000000,
233
1.57M
          -0.0643507165794627f,
234
1.57M
          0.3935103426921017f,
235
1.57M
          -0.0657870154914280f,
236
1.57M
          0.0000000000000000,
237
1.57M
          -0.4082482904638628f,
238
1.57M
          -0.3078822139579090f,
239
1.57M
          -0.3852501370925192f,
240
1.57M
          -0.0857401903551931f,
241
1.57M
          -0.4613374887461511f,
242
1.57M
          0.0000000000000000,
243
1.57M
          0.2191868483885747f,
244
1.57M
      },
245
1.57M
      {
246
1.57M
          0.2500000000000000,
247
1.57M
          -0.1014005039375376f,
248
1.57M
          -0.4067007583026072f,
249
1.57M
          -0.2125574805828705f,
250
1.57M
          0.0000000000000000,
251
1.57M
          -0.0643507165794627f,
252
1.57M
          -0.4517556589999464f,
253
1.57M
          0.3046847507248840f,
254
1.57M
          0.3017929516615503f,
255
1.57M
          -0.4082482904638635f,
256
1.57M
          -0.1747866975480813f,
257
1.57M
          0.2110560104933581f,
258
1.57M
          -0.1426608480880734f,
259
1.57M
          -0.1381354035075829f,
260
1.57M
          -0.1743760259965108f,
261
1.57M
          0.1135498731499426f,
262
1.57M
      },
263
1.57M
      {
264
1.57M
          0.2500000000000000,
265
1.57M
          -0.1014005039375377f,
266
1.57M
          -0.1957439937204287f,
267
1.57M
          -0.1621205195722833f,
268
1.57M
          0.0000000000000000,
269
1.57M
          -0.0643507165794628f,
270
1.57M
          0.0074182263792444f,
271
1.57M
          0.2904801297290076f,
272
1.57M
          0.0952002265347505f,
273
1.57M
          0.0000000000000000,
274
1.57M
          0.3675398009862011f,
275
1.57M
          -0.4921585901373891f,
276
1.57M
          0.2462710772207514f,
277
1.57M
          -0.0794670660591026f,
278
1.57M
          0.3623817333531165f,
279
1.57M
          -0.4351904965232251f,
280
1.57M
      },
281
1.57M
      {
282
1.57M
          0.2500000000000000,
283
1.57M
          -0.1014005039375375f,
284
1.57M
          0.0000000000000000,
285
1.57M
          -0.4706702258572528f,
286
1.57M
          0.0000000000000000,
287
1.57M
          -0.0643507165794627f,
288
1.57M
          0.1107416575309343f,
289
1.57M
          0.0000000000000000,
290
1.57M
          -0.1627234014286617f,
291
1.57M
          0.0000000000000000,
292
1.57M
          0.0000000000000000,
293
1.57M
          0.0000000000000000,
294
1.57M
          0.1488339922711357f,
295
1.57M
          0.4972464710953509f,
296
1.57M
          0.2921026642334879f,
297
1.57M
          0.5550443808910661f,
298
1.57M
      },
299
1.57M
      {
300
1.57M
          0.2500000000000000,
301
1.57M
          -0.1014005039375377f,
302
1.57M
          0.1137907446044809f,
303
1.57M
          -0.1464291867126764f,
304
1.57M
          0.0000000000000000,
305
1.57M
          -0.0643507165794628f,
306
1.57M
          0.0829816309488205f,
307
1.57M
          -0.2388977352334460f,
308
1.57M
          -0.3531238544981630f,
309
1.57M
          -0.4082482904638630f,
310
1.57M
          0.4826689115059883f,
311
1.57M
          0.1741941265991622f,
312
1.57M
          -0.0476868035022925f,
313
1.57M
          0.1253805944856366f,
314
1.57M
          -0.4326608024727445f,
315
1.57M
          -0.2546827712406646f,
316
1.57M
      },
317
1.57M
      {
318
1.57M
          0.2500000000000000,
319
1.57M
          -0.1014005039375377f,
320
1.57M
          -0.4444481661973438f,
321
1.57M
          0.3085497062849487f,
322
1.57M
          0.0000000000000000,
323
1.57M
          -0.0643507165794628f,
324
1.57M
          0.1585450355183970f,
325
1.57M
          -0.5112616136592012f,
326
1.57M
          0.2579236279634129f,
327
1.57M
          0.0000000000000000,
328
1.57M
          -0.0812611176717504f,
329
1.57M
          -0.1856718091610990f,
330
1.57M
          -0.3416446842253373f,
331
1.57M
          0.3302282550303805f,
332
1.57M
          0.0702790691196282f,
333
1.57M
          -0.0741750459581023f,
334
1.57M
      },
335
1.57M
      {
336
1.57M
          0.2500000000000000,
337
1.57M
          -0.1014005039375376f,
338
1.57M
          -0.2929100136981264f,
339
1.57M
          0.0000000000000000,
340
1.57M
          0.0000000000000000,
341
1.57M
          -0.0643507165794627f,
342
1.57M
          0.3935103426921022f,
343
1.57M
          0.0657870154914254f,
344
1.57M
          0.0000000000000000,
345
1.57M
          0.4082482904638634f,
346
1.57M
          0.3078822139579031f,
347
1.57M
          0.3852501370925211f,
348
1.57M
          -0.0857401903551927f,
349
1.57M
          -0.4613374887461554f,
350
1.57M
          0.0000000000000000,
351
1.57M
          0.2191868483885728f,
352
1.57M
      },
353
1.57M
      {
354
1.57M
          0.2500000000000000,
355
1.57M
          -0.1014005039375376f,
356
1.57M
          -0.1137907446044814f,
357
1.57M
          -0.1464291867126654f,
358
1.57M
          0.0000000000000000,
359
1.57M
          -0.0643507165794627f,
360
1.57M
          0.0829816309488214f,
361
1.57M
          0.2388977352334547f,
362
1.57M
          -0.3531238544981624f,
363
1.57M
          0.4082482904638630f,
364
1.57M
          -0.4826689115059858f,
365
1.57M
          -0.1741941265991621f,
366
1.57M
          -0.0476868035022928f,
367
1.57M
          0.1253805944856431f,
368
1.57M
          -0.4326608024727457f,
369
1.57M
          -0.2546827712406641f,
370
1.57M
      },
371
1.57M
      {
372
1.57M
          0.2500000000000000,
373
1.57M
          -0.1014005039375374f,
374
1.57M
          0.0000000000000000,
375
1.57M
          0.4251149611657548f,
376
1.57M
          0.0000000000000000,
377
1.57M
          -0.0643507165794626f,
378
1.57M
          -0.4517556589999480f,
379
1.57M
          0.0000000000000000,
380
1.57M
          -0.6035859033230976f,
381
1.57M
          0.0000000000000000,
382
1.57M
          0.0000000000000000,
383
1.57M
          0.0000000000000000,
384
1.57M
          -0.1426608480880724f,
385
1.57M
          -0.1381354035075845f,
386
1.57M
          0.3487520519930227f,
387
1.57M
          0.1135498731499429f,
388
1.57M
      },
389
1.57M
  };
390
391
1.57M
  const HWY_CAPPED(float, 16) d;
392
4.72M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
3.15M
    auto scalar = Zero(d);
394
53.5M
    for (size_t j = 0; j < 16; j++) {
395
50.4M
      auto px = Set(d, pixels[j]);
396
50.4M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
50.4M
      scalar = MulAdd(px, basis, scalar);
398
50.4M
    }
399
3.15M
    Store(scalar, d, coeffs + i);
400
3.15M
  }
401
1.57M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
402
403
// Coefficient layout:
404
//  - (even, even) positions hold AFV coefficients
405
//  - (odd, even) positions hold DCT4x4 coefficients
406
//  - (any, odd) positions hold DCT4x8 coefficients
407
template <size_t afv_kind>
408
void AFVTransformFromPixels(const float* JXL_RESTRICT pixels,
409
                            size_t pixels_stride,
410
72.3M
                            float* JXL_RESTRICT coefficients) {
411
72.3M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
72.3M
  size_t afv_x = afv_kind & 1;
413
72.3M
  size_t afv_y = afv_kind / 2;
414
72.3M
  HWY_ALIGN float block[4 * 8] = {};
415
361M
  for (size_t iy = 0; iy < 4; iy++) {
416
1.44G
    for (size_t ix = 0; ix < 4; ix++) {
417
1.15G
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.15G
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.15G
    }
420
289M
  }
421
  // AFV coefficients in (even, even) positions.
422
72.3M
  HWY_ALIGN float coeff[4 * 4];
423
72.3M
  AFVDCT4x4(block, coeff);
424
361M
  for (size_t iy = 0; iy < 4; iy++) {
425
1.44G
    for (size_t ix = 0; ix < 4; ix++) {
426
1.15G
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.15G
    }
428
289M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
72.3M
  ComputeScaledDCT<4, 4>()(
431
72.3M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
72.3M
              pixels_stride),
433
72.3M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
361M
  for (size_t iy = 0; iy < 4; iy++) {
436
2.60G
    for (size_t ix = 0; ix < 8; ix++) {
437
2.31G
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.31G
    }
439
289M
  }
440
  // 4x8 DCT of the other half of the block.
441
72.3M
  ComputeScaledDCT<4, 8>()(
442
72.3M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
72.3M
      block, scratch_space);
444
361M
  for (size_t iy = 0; iy < 4; iy++) {
445
2.60G
    for (size_t ix = 0; ix < 8; ix++) {
446
2.31G
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.31G
    }
448
289M
  }
449
72.3M
  float block00 = coefficients[0] * 0.25f;
450
72.3M
  float block01 = coefficients[1];
451
72.3M
  float block10 = coefficients[8];
452
72.3M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
72.3M
  coefficients[1] = (block00 - block01) * 0.5f;
454
72.3M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
72.3M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
295k
                            float* JXL_RESTRICT coefficients) {
411
295k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
295k
  size_t afv_x = afv_kind & 1;
413
295k
  size_t afv_y = afv_kind / 2;
414
295k
  HWY_ALIGN float block[4 * 8] = {};
415
1.47M
  for (size_t iy = 0; iy < 4; iy++) {
416
5.91M
    for (size_t ix = 0; ix < 4; ix++) {
417
4.73M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
4.73M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
4.73M
    }
420
1.18M
  }
421
  // AFV coefficients in (even, even) positions.
422
295k
  HWY_ALIGN float coeff[4 * 4];
423
295k
  AFVDCT4x4(block, coeff);
424
1.47M
  for (size_t iy = 0; iy < 4; iy++) {
425
5.91M
    for (size_t ix = 0; ix < 4; ix++) {
426
4.73M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
4.73M
    }
428
1.18M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
295k
  ComputeScaledDCT<4, 4>()(
431
295k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
295k
              pixels_stride),
433
295k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.47M
  for (size_t iy = 0; iy < 4; iy++) {
436
10.6M
    for (size_t ix = 0; ix < 8; ix++) {
437
9.46M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
9.46M
    }
439
1.18M
  }
440
  // 4x8 DCT of the other half of the block.
441
295k
  ComputeScaledDCT<4, 8>()(
442
295k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
295k
      block, scratch_space);
444
1.47M
  for (size_t iy = 0; iy < 4; iy++) {
445
10.6M
    for (size_t ix = 0; ix < 8; ix++) {
446
9.46M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
9.46M
    }
448
1.18M
  }
449
295k
  float block00 = coefficients[0] * 0.25f;
450
295k
  float block01 = coefficients[1];
451
295k
  float block10 = coefficients[8];
452
295k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
295k
  coefficients[1] = (block00 - block01) * 0.5f;
454
295k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
295k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
764k
                            float* JXL_RESTRICT coefficients) {
411
764k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
764k
  size_t afv_x = afv_kind & 1;
413
764k
  size_t afv_y = afv_kind / 2;
414
764k
  HWY_ALIGN float block[4 * 8] = {};
415
3.82M
  for (size_t iy = 0; iy < 4; iy++) {
416
15.2M
    for (size_t ix = 0; ix < 4; ix++) {
417
12.2M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
12.2M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
12.2M
    }
420
3.05M
  }
421
  // AFV coefficients in (even, even) positions.
422
764k
  HWY_ALIGN float coeff[4 * 4];
423
764k
  AFVDCT4x4(block, coeff);
424
3.82M
  for (size_t iy = 0; iy < 4; iy++) {
425
15.2M
    for (size_t ix = 0; ix < 4; ix++) {
426
12.2M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
12.2M
    }
428
3.05M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
764k
  ComputeScaledDCT<4, 4>()(
431
764k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
764k
              pixels_stride),
433
764k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
3.82M
  for (size_t iy = 0; iy < 4; iy++) {
436
27.5M
    for (size_t ix = 0; ix < 8; ix++) {
437
24.4M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
24.4M
    }
439
3.05M
  }
440
  // 4x8 DCT of the other half of the block.
441
764k
  ComputeScaledDCT<4, 8>()(
442
764k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
764k
      block, scratch_space);
444
3.82M
  for (size_t iy = 0; iy < 4; iy++) {
445
27.5M
    for (size_t ix = 0; ix < 8; ix++) {
446
24.4M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
24.4M
    }
448
3.05M
  }
449
764k
  float block00 = coefficients[0] * 0.25f;
450
764k
  float block01 = coefficients[1];
451
764k
  float block10 = coefficients[8];
452
764k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
764k
  coefficients[1] = (block00 - block01) * 0.5f;
454
764k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
764k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
201k
                            float* JXL_RESTRICT coefficients) {
411
201k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
201k
  size_t afv_x = afv_kind & 1;
413
201k
  size_t afv_y = afv_kind / 2;
414
201k
  HWY_ALIGN float block[4 * 8] = {};
415
1.00M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.03M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.23M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
3.23M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
3.23M
    }
420
807k
  }
421
  // AFV coefficients in (even, even) positions.
422
201k
  HWY_ALIGN float coeff[4 * 4];
423
201k
  AFVDCT4x4(block, coeff);
424
1.00M
  for (size_t iy = 0; iy < 4; iy++) {
425
4.03M
    for (size_t ix = 0; ix < 4; ix++) {
426
3.23M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
3.23M
    }
428
807k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
201k
  ComputeScaledDCT<4, 4>()(
431
201k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
201k
              pixels_stride),
433
201k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.00M
  for (size_t iy = 0; iy < 4; iy++) {
436
7.27M
    for (size_t ix = 0; ix < 8; ix++) {
437
6.46M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
6.46M
    }
439
807k
  }
440
  // 4x8 DCT of the other half of the block.
441
201k
  ComputeScaledDCT<4, 8>()(
442
201k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
201k
      block, scratch_space);
444
1.00M
  for (size_t iy = 0; iy < 4; iy++) {
445
7.27M
    for (size_t ix = 0; ix < 8; ix++) {
446
6.46M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
6.46M
    }
448
807k
  }
449
201k
  float block00 = coefficients[0] * 0.25f;
450
201k
  float block01 = coefficients[1];
451
201k
  float block10 = coefficients[8];
452
201k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
201k
  coefficients[1] = (block00 - block01) * 0.5f;
454
201k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
201k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
313k
                            float* JXL_RESTRICT coefficients) {
411
313k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
313k
  size_t afv_x = afv_kind & 1;
413
313k
  size_t afv_y = afv_kind / 2;
414
313k
  HWY_ALIGN float block[4 * 8] = {};
415
1.56M
  for (size_t iy = 0; iy < 4; iy++) {
416
6.26M
    for (size_t ix = 0; ix < 4; ix++) {
417
5.01M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
5.01M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
5.01M
    }
420
1.25M
  }
421
  // AFV coefficients in (even, even) positions.
422
313k
  HWY_ALIGN float coeff[4 * 4];
423
313k
  AFVDCT4x4(block, coeff);
424
1.56M
  for (size_t iy = 0; iy < 4; iy++) {
425
6.26M
    for (size_t ix = 0; ix < 4; ix++) {
426
5.01M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
5.01M
    }
428
1.25M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
313k
  ComputeScaledDCT<4, 4>()(
431
313k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
313k
              pixels_stride),
433
313k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.56M
  for (size_t iy = 0; iy < 4; iy++) {
436
11.2M
    for (size_t ix = 0; ix < 8; ix++) {
437
10.0M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
10.0M
    }
439
1.25M
  }
440
  // 4x8 DCT of the other half of the block.
441
313k
  ComputeScaledDCT<4, 8>()(
442
313k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
313k
      block, scratch_space);
444
1.56M
  for (size_t iy = 0; iy < 4; iy++) {
445
11.2M
    for (size_t ix = 0; ix < 8; ix++) {
446
10.0M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
10.0M
    }
448
1.25M
  }
449
313k
  float block00 = coefficients[0] * 0.25f;
450
313k
  float block01 = coefficients[1];
451
313k
  float block10 = coefficients[8];
452
313k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
313k
  coefficients[1] = (block00 - block01) * 0.5f;
454
313k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
313k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
17.2M
                            float* JXL_RESTRICT coefficients) {
411
17.2M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
17.2M
  size_t afv_x = afv_kind & 1;
413
17.2M
  size_t afv_y = afv_kind / 2;
414
17.2M
  HWY_ALIGN float block[4 * 8] = {};
415
86.4M
  for (size_t iy = 0; iy < 4; iy++) {
416
345M
    for (size_t ix = 0; ix < 4; ix++) {
417
276M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
276M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
276M
    }
420
69.1M
  }
421
  // AFV coefficients in (even, even) positions.
422
17.2M
  HWY_ALIGN float coeff[4 * 4];
423
17.2M
  AFVDCT4x4(block, coeff);
424
86.4M
  for (size_t iy = 0; iy < 4; iy++) {
425
345M
    for (size_t ix = 0; ix < 4; ix++) {
426
276M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
276M
    }
428
69.1M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
17.2M
  ComputeScaledDCT<4, 4>()(
431
17.2M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
17.2M
              pixels_stride),
433
17.2M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
86.4M
  for (size_t iy = 0; iy < 4; iy++) {
436
622M
    for (size_t ix = 0; ix < 8; ix++) {
437
553M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
553M
    }
439
69.1M
  }
440
  // 4x8 DCT of the other half of the block.
441
17.2M
  ComputeScaledDCT<4, 8>()(
442
17.2M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
17.2M
      block, scratch_space);
444
86.4M
  for (size_t iy = 0; iy < 4; iy++) {
445
622M
    for (size_t ix = 0; ix < 8; ix++) {
446
553M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
553M
    }
448
69.1M
  }
449
17.2M
  float block00 = coefficients[0] * 0.25f;
450
17.2M
  float block01 = coefficients[1];
451
17.2M
  float block10 = coefficients[8];
452
17.2M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
17.2M
  coefficients[1] = (block00 - block01) * 0.5f;
454
17.2M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
17.2M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
17.2M
                            float* JXL_RESTRICT coefficients) {
411
17.2M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
17.2M
  size_t afv_x = afv_kind & 1;
413
17.2M
  size_t afv_y = afv_kind / 2;
414
17.2M
  HWY_ALIGN float block[4 * 8] = {};
415
86.4M
  for (size_t iy = 0; iy < 4; iy++) {
416
345M
    for (size_t ix = 0; ix < 4; ix++) {
417
276M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
276M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
276M
    }
420
69.1M
  }
421
  // AFV coefficients in (even, even) positions.
422
17.2M
  HWY_ALIGN float coeff[4 * 4];
423
17.2M
  AFVDCT4x4(block, coeff);
424
86.4M
  for (size_t iy = 0; iy < 4; iy++) {
425
345M
    for (size_t ix = 0; ix < 4; ix++) {
426
276M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
276M
    }
428
69.1M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
17.2M
  ComputeScaledDCT<4, 4>()(
431
17.2M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
17.2M
              pixels_stride),
433
17.2M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
86.4M
  for (size_t iy = 0; iy < 4; iy++) {
436
622M
    for (size_t ix = 0; ix < 8; ix++) {
437
553M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
553M
    }
439
69.1M
  }
440
  // 4x8 DCT of the other half of the block.
441
17.2M
  ComputeScaledDCT<4, 8>()(
442
17.2M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
17.2M
      block, scratch_space);
444
86.4M
  for (size_t iy = 0; iy < 4; iy++) {
445
622M
    for (size_t ix = 0; ix < 8; ix++) {
446
553M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
553M
    }
448
69.1M
  }
449
17.2M
  float block00 = coefficients[0] * 0.25f;
450
17.2M
  float block01 = coefficients[1];
451
17.2M
  float block10 = coefficients[8];
452
17.2M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
17.2M
  coefficients[1] = (block00 - block01) * 0.5f;
454
17.2M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
17.2M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
17.2M
                            float* JXL_RESTRICT coefficients) {
411
17.2M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
17.2M
  size_t afv_x = afv_kind & 1;
413
17.2M
  size_t afv_y = afv_kind / 2;
414
17.2M
  HWY_ALIGN float block[4 * 8] = {};
415
86.4M
  for (size_t iy = 0; iy < 4; iy++) {
416
345M
    for (size_t ix = 0; ix < 4; ix++) {
417
276M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
276M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
276M
    }
420
69.1M
  }
421
  // AFV coefficients in (even, even) positions.
422
17.2M
  HWY_ALIGN float coeff[4 * 4];
423
17.2M
  AFVDCT4x4(block, coeff);
424
86.4M
  for (size_t iy = 0; iy < 4; iy++) {
425
345M
    for (size_t ix = 0; ix < 4; ix++) {
426
276M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
276M
    }
428
69.1M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
17.2M
  ComputeScaledDCT<4, 4>()(
431
17.2M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
17.2M
              pixels_stride),
433
17.2M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
86.4M
  for (size_t iy = 0; iy < 4; iy++) {
436
622M
    for (size_t ix = 0; ix < 8; ix++) {
437
553M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
553M
    }
439
69.1M
  }
440
  // 4x8 DCT of the other half of the block.
441
17.2M
  ComputeScaledDCT<4, 8>()(
442
17.2M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
17.2M
      block, scratch_space);
444
86.4M
  for (size_t iy = 0; iy < 4; iy++) {
445
622M
    for (size_t ix = 0; ix < 8; ix++) {
446
553M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
553M
    }
448
69.1M
  }
449
17.2M
  float block00 = coefficients[0] * 0.25f;
450
17.2M
  float block01 = coefficients[1];
451
17.2M
  float block10 = coefficients[8];
452
17.2M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
17.2M
  coefficients[1] = (block00 - block01) * 0.5f;
454
17.2M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
17.2M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
17.2M
                            float* JXL_RESTRICT coefficients) {
411
17.2M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
17.2M
  size_t afv_x = afv_kind & 1;
413
17.2M
  size_t afv_y = afv_kind / 2;
414
17.2M
  HWY_ALIGN float block[4 * 8] = {};
415
86.4M
  for (size_t iy = 0; iy < 4; iy++) {
416
345M
    for (size_t ix = 0; ix < 4; ix++) {
417
276M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
276M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
276M
    }
420
69.1M
  }
421
  // AFV coefficients in (even, even) positions.
422
17.2M
  HWY_ALIGN float coeff[4 * 4];
423
17.2M
  AFVDCT4x4(block, coeff);
424
86.4M
  for (size_t iy = 0; iy < 4; iy++) {
425
345M
    for (size_t ix = 0; ix < 4; ix++) {
426
276M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
276M
    }
428
69.1M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
17.2M
  ComputeScaledDCT<4, 4>()(
431
17.2M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
17.2M
              pixels_stride),
433
17.2M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
86.4M
  for (size_t iy = 0; iy < 4; iy++) {
436
622M
    for (size_t ix = 0; ix < 8; ix++) {
437
553M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
553M
    }
439
69.1M
  }
440
  // 4x8 DCT of the other half of the block.
441
17.2M
  ComputeScaledDCT<4, 8>()(
442
17.2M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
17.2M
      block, scratch_space);
444
86.4M
  for (size_t iy = 0; iy < 4; iy++) {
445
622M
    for (size_t ix = 0; ix < 8; ix++) {
446
553M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
553M
    }
448
69.1M
  }
449
17.2M
  float block00 = coefficients[0] * 0.25f;
450
17.2M
  float block01 = coefficients[1];
451
17.2M
  float block10 = coefficients[8];
452
17.2M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
17.2M
  coefficients[1] = (block00 - block01) * 0.5f;
454
17.2M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
17.2M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
295k
                            float* JXL_RESTRICT coefficients) {
411
295k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
295k
  size_t afv_x = afv_kind & 1;
413
295k
  size_t afv_y = afv_kind / 2;
414
295k
  HWY_ALIGN float block[4 * 8] = {};
415
1.47M
  for (size_t iy = 0; iy < 4; iy++) {
416
5.91M
    for (size_t ix = 0; ix < 4; ix++) {
417
4.73M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
4.73M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
4.73M
    }
420
1.18M
  }
421
  // AFV coefficients in (even, even) positions.
422
295k
  HWY_ALIGN float coeff[4 * 4];
423
295k
  AFVDCT4x4(block, coeff);
424
1.47M
  for (size_t iy = 0; iy < 4; iy++) {
425
5.91M
    for (size_t ix = 0; ix < 4; ix++) {
426
4.73M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
4.73M
    }
428
1.18M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
295k
  ComputeScaledDCT<4, 4>()(
431
295k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
295k
              pixels_stride),
433
295k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.47M
  for (size_t iy = 0; iy < 4; iy++) {
436
10.6M
    for (size_t ix = 0; ix < 8; ix++) {
437
9.46M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
9.46M
    }
439
1.18M
  }
440
  // 4x8 DCT of the other half of the block.
441
295k
  ComputeScaledDCT<4, 8>()(
442
295k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
295k
      block, scratch_space);
444
1.47M
  for (size_t iy = 0; iy < 4; iy++) {
445
10.6M
    for (size_t ix = 0; ix < 8; ix++) {
446
9.46M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
9.46M
    }
448
1.18M
  }
449
295k
  float block00 = coefficients[0] * 0.25f;
450
295k
  float block01 = coefficients[1];
451
295k
  float block10 = coefficients[8];
452
295k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
295k
  coefficients[1] = (block00 - block01) * 0.5f;
454
295k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
295k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
764k
                            float* JXL_RESTRICT coefficients) {
411
764k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
764k
  size_t afv_x = afv_kind & 1;
413
764k
  size_t afv_y = afv_kind / 2;
414
764k
  HWY_ALIGN float block[4 * 8] = {};
415
3.82M
  for (size_t iy = 0; iy < 4; iy++) {
416
15.2M
    for (size_t ix = 0; ix < 4; ix++) {
417
12.2M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
12.2M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
12.2M
    }
420
3.05M
  }
421
  // AFV coefficients in (even, even) positions.
422
764k
  HWY_ALIGN float coeff[4 * 4];
423
764k
  AFVDCT4x4(block, coeff);
424
3.82M
  for (size_t iy = 0; iy < 4; iy++) {
425
15.2M
    for (size_t ix = 0; ix < 4; ix++) {
426
12.2M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
12.2M
    }
428
3.05M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
764k
  ComputeScaledDCT<4, 4>()(
431
764k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
764k
              pixels_stride),
433
764k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
3.82M
  for (size_t iy = 0; iy < 4; iy++) {
436
27.5M
    for (size_t ix = 0; ix < 8; ix++) {
437
24.4M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
24.4M
    }
439
3.05M
  }
440
  // 4x8 DCT of the other half of the block.
441
764k
  ComputeScaledDCT<4, 8>()(
442
764k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
764k
      block, scratch_space);
444
3.82M
  for (size_t iy = 0; iy < 4; iy++) {
445
27.5M
    for (size_t ix = 0; ix < 8; ix++) {
446
24.4M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
24.4M
    }
448
3.05M
  }
449
764k
  float block00 = coefficients[0] * 0.25f;
450
764k
  float block01 = coefficients[1];
451
764k
  float block10 = coefficients[8];
452
764k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
764k
  coefficients[1] = (block00 - block01) * 0.5f;
454
764k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
764k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
201k
                            float* JXL_RESTRICT coefficients) {
411
201k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
201k
  size_t afv_x = afv_kind & 1;
413
201k
  size_t afv_y = afv_kind / 2;
414
201k
  HWY_ALIGN float block[4 * 8] = {};
415
1.00M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.03M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.23M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
3.23M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
3.23M
    }
420
807k
  }
421
  // AFV coefficients in (even, even) positions.
422
201k
  HWY_ALIGN float coeff[4 * 4];
423
201k
  AFVDCT4x4(block, coeff);
424
1.00M
  for (size_t iy = 0; iy < 4; iy++) {
425
4.03M
    for (size_t ix = 0; ix < 4; ix++) {
426
3.23M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
3.23M
    }
428
807k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
201k
  ComputeScaledDCT<4, 4>()(
431
201k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
201k
              pixels_stride),
433
201k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.00M
  for (size_t iy = 0; iy < 4; iy++) {
436
7.27M
    for (size_t ix = 0; ix < 8; ix++) {
437
6.46M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
6.46M
    }
439
807k
  }
440
  // 4x8 DCT of the other half of the block.
441
201k
  ComputeScaledDCT<4, 8>()(
442
201k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
201k
      block, scratch_space);
444
1.00M
  for (size_t iy = 0; iy < 4; iy++) {
445
7.27M
    for (size_t ix = 0; ix < 8; ix++) {
446
6.46M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
6.46M
    }
448
807k
  }
449
201k
  float block00 = coefficients[0] * 0.25f;
450
201k
  float block01 = coefficients[1];
451
201k
  float block10 = coefficients[8];
452
201k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
201k
  coefficients[1] = (block00 - block01) * 0.5f;
454
201k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
201k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
313k
                            float* JXL_RESTRICT coefficients) {
411
313k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
313k
  size_t afv_x = afv_kind & 1;
413
313k
  size_t afv_y = afv_kind / 2;
414
313k
  HWY_ALIGN float block[4 * 8] = {};
415
1.56M
  for (size_t iy = 0; iy < 4; iy++) {
416
6.26M
    for (size_t ix = 0; ix < 4; ix++) {
417
5.01M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
5.01M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
5.01M
    }
420
1.25M
  }
421
  // AFV coefficients in (even, even) positions.
422
313k
  HWY_ALIGN float coeff[4 * 4];
423
313k
  AFVDCT4x4(block, coeff);
424
1.56M
  for (size_t iy = 0; iy < 4; iy++) {
425
6.26M
    for (size_t ix = 0; ix < 4; ix++) {
426
5.01M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
5.01M
    }
428
1.25M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
313k
  ComputeScaledDCT<4, 4>()(
431
313k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
313k
              pixels_stride),
433
313k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.56M
  for (size_t iy = 0; iy < 4; iy++) {
436
11.2M
    for (size_t ix = 0; ix < 8; ix++) {
437
10.0M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
10.0M
    }
439
1.25M
  }
440
  // 4x8 DCT of the other half of the block.
441
313k
  ComputeScaledDCT<4, 8>()(
442
313k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
313k
      block, scratch_space);
444
1.56M
  for (size_t iy = 0; iy < 4; iy++) {
445
11.2M
    for (size_t ix = 0; ix < 8; ix++) {
446
10.0M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
10.0M
    }
448
1.25M
  }
449
313k
  float block00 = coefficients[0] * 0.25f;
450
313k
  float block01 = coefficients[1];
451
313k
  float block10 = coefficients[8];
452
313k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
313k
  coefficients[1] = (block00 - block01) * 0.5f;
454
313k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
313k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
456
457
HWY_MAYBE_UNUSED void TransformFromPixels(const AcStrategyType strategy,
458
                                          const float* JXL_RESTRICT pixels,
459
                                          size_t pixels_stride,
460
                                          float* JXL_RESTRICT coefficients,
461
250M
                                          float* JXL_RESTRICT scratch_space) {
462
250M
  using Type = AcStrategyType;
463
250M
  switch (strategy) {
464
19.3M
    case Type::IDENTITY: {
465
57.9M
      for (size_t y = 0; y < 2; y++) {
466
115M
        for (size_t x = 0; x < 2; x++) {
467
77.2M
          float block_dc = 0;
468
386M
          for (size_t iy = 0; iy < 4; iy++) {
469
1.54G
            for (size_t ix = 0; ix < 4; ix++) {
470
1.23G
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
1.23G
            }
472
308M
          }
473
77.2M
          block_dc *= 1.0f / 16;
474
386M
          for (size_t iy = 0; iy < 4; iy++) {
475
1.54G
            for (size_t ix = 0; ix < 4; ix++) {
476
1.23G
              if (ix == 1 && iy == 1) continue;
477
1.15G
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
1.15G
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
1.15G
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
1.15G
            }
481
308M
          }
482
77.2M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
77.2M
          coefficients[y * 8 + x] = block_dc;
484
77.2M
        }
485
38.6M
      }
486
19.3M
      float block00 = coefficients[0];
487
19.3M
      float block01 = coefficients[1];
488
19.3M
      float block10 = coefficients[8];
489
19.3M
      float block11 = coefficients[9];
490
19.3M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
19.3M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
19.3M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
19.3M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
19.3M
      break;
495
0
    }
496
18.1M
    case Type::DCT8X4: {
497
54.3M
      for (size_t x = 0; x < 2; x++) {
498
36.2M
        HWY_ALIGN float block[4 * 8];
499
36.2M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
36.2M
                                 scratch_space);
501
181M
        for (size_t iy = 0; iy < 4; iy++) {
502
1.30G
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
1.15G
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
1.15G
          }
506
144M
        }
507
36.2M
      }
508
18.1M
      float block0 = coefficients[0];
509
18.1M
      float block1 = coefficients[8];
510
18.1M
      coefficients[0] = (block0 + block1) * 0.5f;
511
18.1M
      coefficients[8] = (block0 - block1) * 0.5f;
512
18.1M
      break;
513
0
    }
514
17.8M
    case Type::DCT4X8: {
515
53.6M
      for (size_t y = 0; y < 2; y++) {
516
35.7M
        HWY_ALIGN float block[4 * 8];
517
35.7M
        ComputeScaledDCT<4, 8>()(
518
35.7M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
35.7M
            scratch_space);
520
178M
        for (size_t iy = 0; iy < 4; iy++) {
521
1.28G
          for (size_t ix = 0; ix < 8; ix++) {
522
1.14G
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
1.14G
          }
524
143M
        }
525
35.7M
      }
526
17.8M
      float block0 = coefficients[0];
527
17.8M
      float block1 = coefficients[8];
528
17.8M
      coefficients[0] = (block0 + block1) * 0.5f;
529
17.8M
      coefficients[8] = (block0 - block1) * 0.5f;
530
17.8M
      break;
531
0
    }
532
17.3M
    case Type::DCT4X4: {
533
51.9M
      for (size_t y = 0; y < 2; y++) {
534
103M
        for (size_t x = 0; x < 2; x++) {
535
69.2M
          HWY_ALIGN float block[4 * 4];
536
69.2M
          ComputeScaledDCT<4, 4>()(
537
69.2M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
69.2M
              block, scratch_space);
539
346M
          for (size_t iy = 0; iy < 4; iy++) {
540
1.38G
            for (size_t ix = 0; ix < 4; ix++) {
541
1.10G
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
1.10G
            }
543
276M
          }
544
69.2M
        }
545
34.6M
      }
546
17.3M
      float block00 = coefficients[0];
547
17.3M
      float block01 = coefficients[1];
548
17.3M
      float block10 = coefficients[8];
549
17.3M
      float block11 = coefficients[9];
550
17.3M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
17.3M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
17.3M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
17.3M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
17.3M
      break;
555
0
    }
556
19.3M
    case Type::DCT2X2: {
557
19.3M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
19.3M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
19.3M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
19.3M
      break;
561
0
    }
562
7.43M
    case Type::DCT16X16: {
563
7.43M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
7.43M
                                 scratch_space);
565
7.43M
      break;
566
0
    }
567
14.5M
    case Type::DCT16X8: {
568
14.5M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
14.5M
                                scratch_space);
570
14.5M
      break;
571
0
    }
572
14.8M
    case Type::DCT8X16: {
573
14.8M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
14.8M
                                scratch_space);
575
14.8M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
2.88M
    case Type::DCT32X16: {
588
2.88M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
2.88M
                                 scratch_space);
590
2.88M
      break;
591
0
    }
592
3.00M
    case Type::DCT16X32: {
593
3.00M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
3.00M
                                 scratch_space);
595
3.00M
      break;
596
0
    }
597
1.79M
    case Type::DCT32X32: {
598
1.79M
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
1.79M
                                 scratch_space);
600
1.79M
      break;
601
0
    }
602
40.1M
    case Type::DCT: {
603
40.1M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
40.1M
                               scratch_space);
605
40.1M
      break;
606
0
    }
607
17.8M
    case Type::AFV0: {
608
17.8M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
17.8M
      break;
610
0
    }
611
18.8M
    case Type::AFV1: {
612
18.8M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
18.8M
      break;
614
0
    }
615
17.7M
    case Type::AFV2: {
616
17.7M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
17.7M
      break;
618
0
    }
619
17.9M
    case Type::AFV3: {
620
17.9M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
17.9M
      break;
622
0
    }
623
344k
    case Type::DCT64X64: {
624
344k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
344k
                                 scratch_space);
626
344k
      break;
627
0
    }
628
895k
    case Type::DCT64X32: {
629
895k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
895k
                                 scratch_space);
631
895k
      break;
632
0
    }
633
598k
    case Type::DCT32X64: {
634
598k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
598k
                                 scratch_space);
636
598k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
250M
  }
669
250M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
8.30M
                                          float* JXL_RESTRICT scratch_space) {
462
8.30M
  using Type = AcStrategyType;
463
8.30M
  switch (strategy) {
464
1.00M
    case Type::IDENTITY: {
465
3.01M
      for (size_t y = 0; y < 2; y++) {
466
6.03M
        for (size_t x = 0; x < 2; x++) {
467
4.02M
          float block_dc = 0;
468
20.1M
          for (size_t iy = 0; iy < 4; iy++) {
469
80.4M
            for (size_t ix = 0; ix < 4; ix++) {
470
64.3M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
64.3M
            }
472
16.0M
          }
473
4.02M
          block_dc *= 1.0f / 16;
474
20.1M
          for (size_t iy = 0; iy < 4; iy++) {
475
80.4M
            for (size_t ix = 0; ix < 4; ix++) {
476
64.3M
              if (ix == 1 && iy == 1) continue;
477
60.3M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
60.3M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
60.3M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
60.3M
            }
481
16.0M
          }
482
4.02M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
4.02M
          coefficients[y * 8 + x] = block_dc;
484
4.02M
        }
485
2.01M
      }
486
1.00M
      float block00 = coefficients[0];
487
1.00M
      float block01 = coefficients[1];
488
1.00M
      float block10 = coefficients[8];
489
1.00M
      float block11 = coefficients[9];
490
1.00M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
1.00M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
1.00M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
1.00M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
1.00M
      break;
495
0
    }
496
401k
    case Type::DCT8X4: {
497
1.20M
      for (size_t x = 0; x < 2; x++) {
498
802k
        HWY_ALIGN float block[4 * 8];
499
802k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
802k
                                 scratch_space);
501
4.01M
        for (size_t iy = 0; iy < 4; iy++) {
502
28.8M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
25.6M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
25.6M
          }
506
3.20M
        }
507
802k
      }
508
401k
      float block0 = coefficients[0];
509
401k
      float block1 = coefficients[8];
510
401k
      coefficients[0] = (block0 + block1) * 0.5f;
511
401k
      coefficients[8] = (block0 - block1) * 0.5f;
512
401k
      break;
513
0
    }
514
294k
    case Type::DCT4X8: {
515
883k
      for (size_t y = 0; y < 2; y++) {
516
588k
        HWY_ALIGN float block[4 * 8];
517
588k
        ComputeScaledDCT<4, 8>()(
518
588k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
588k
            scratch_space);
520
2.94M
        for (size_t iy = 0; iy < 4; iy++) {
521
21.2M
          for (size_t ix = 0; ix < 8; ix++) {
522
18.8M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
18.8M
          }
524
2.35M
        }
525
588k
      }
526
294k
      float block0 = coefficients[0];
527
294k
      float block1 = coefficients[8];
528
294k
      coefficients[0] = (block0 + block1) * 0.5f;
529
294k
      coefficients[8] = (block0 - block1) * 0.5f;
530
294k
      break;
531
0
    }
532
2.38k
    case Type::DCT4X4: {
533
7.14k
      for (size_t y = 0; y < 2; y++) {
534
14.2k
        for (size_t x = 0; x < 2; x++) {
535
9.52k
          HWY_ALIGN float block[4 * 4];
536
9.52k
          ComputeScaledDCT<4, 4>()(
537
9.52k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
9.52k
              block, scratch_space);
539
47.6k
          for (size_t iy = 0; iy < 4; iy++) {
540
190k
            for (size_t ix = 0; ix < 4; ix++) {
541
152k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
152k
            }
543
38.1k
          }
544
9.52k
        }
545
4.76k
      }
546
2.38k
      float block00 = coefficients[0];
547
2.38k
      float block01 = coefficients[1];
548
2.38k
      float block10 = coefficients[8];
549
2.38k
      float block11 = coefficients[9];
550
2.38k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
2.38k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
2.38k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
2.38k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
2.38k
      break;
555
0
    }
556
1.04M
    case Type::DCT2X2: {
557
1.04M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
1.04M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
1.04M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
1.04M
      break;
561
0
    }
562
153k
    case Type::DCT16X16: {
563
153k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
153k
                                 scratch_space);
565
153k
      break;
566
0
    }
567
257k
    case Type::DCT16X8: {
568
257k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
257k
                                scratch_space);
570
257k
      break;
571
0
    }
572
382k
    case Type::DCT8X16: {
573
382k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
382k
                                scratch_space);
575
382k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
65.4k
    case Type::DCT32X16: {
588
65.4k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
65.4k
                                 scratch_space);
590
65.4k
      break;
591
0
    }
592
106k
    case Type::DCT16X32: {
593
106k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
106k
                                 scratch_space);
595
106k
      break;
596
0
    }
597
194k
    case Type::DCT32X32: {
598
194k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
194k
                                 scratch_space);
600
194k
      break;
601
0
    }
602
2.75M
    case Type::DCT: {
603
2.75M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
2.75M
                               scratch_space);
605
2.75M
      break;
606
0
    }
607
295k
    case Type::AFV0: {
608
295k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
295k
      break;
610
0
    }
611
764k
    case Type::AFV1: {
612
764k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
764k
      break;
614
0
    }
615
201k
    case Type::AFV2: {
616
201k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
201k
      break;
618
0
    }
619
313k
    case Type::AFV3: {
620
313k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
313k
      break;
622
0
    }
623
52.2k
    case Type::DCT64X64: {
624
52.2k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
52.2k
                                 scratch_space);
626
52.2k
      break;
627
0
    }
628
6.48k
    case Type::DCT64X32: {
629
6.48k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
6.48k
                                 scratch_space);
631
6.48k
      break;
632
0
    }
633
9.15k
    case Type::DCT32X64: {
634
9.15k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
9.15k
                                 scratch_space);
636
9.15k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
8.30M
  }
669
8.30M
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
216M
                                          float* JXL_RESTRICT scratch_space) {
462
216M
  using Type = AcStrategyType;
463
216M
  switch (strategy) {
464
17.2M
    case Type::IDENTITY: {
465
51.8M
      for (size_t y = 0; y < 2; y++) {
466
103M
        for (size_t x = 0; x < 2; x++) {
467
69.1M
          float block_dc = 0;
468
345M
          for (size_t iy = 0; iy < 4; iy++) {
469
1.38G
            for (size_t ix = 0; ix < 4; ix++) {
470
1.10G
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
1.10G
            }
472
276M
          }
473
69.1M
          block_dc *= 1.0f / 16;
474
345M
          for (size_t iy = 0; iy < 4; iy++) {
475
1.38G
            for (size_t ix = 0; ix < 4; ix++) {
476
1.10G
              if (ix == 1 && iy == 1) continue;
477
1.03G
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
1.03G
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
1.03G
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
1.03G
            }
481
276M
          }
482
69.1M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
69.1M
          coefficients[y * 8 + x] = block_dc;
484
69.1M
        }
485
34.5M
      }
486
17.2M
      float block00 = coefficients[0];
487
17.2M
      float block01 = coefficients[1];
488
17.2M
      float block10 = coefficients[8];
489
17.2M
      float block11 = coefficients[9];
490
17.2M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
17.2M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
17.2M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
17.2M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
17.2M
      break;
495
0
    }
496
17.2M
    case Type::DCT8X4: {
497
51.8M
      for (size_t x = 0; x < 2; x++) {
498
34.5M
        HWY_ALIGN float block[4 * 8];
499
34.5M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
34.5M
                                 scratch_space);
501
172M
        for (size_t iy = 0; iy < 4; iy++) {
502
1.24G
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
1.10G
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
1.10G
          }
506
138M
        }
507
34.5M
      }
508
17.2M
      float block0 = coefficients[0];
509
17.2M
      float block1 = coefficients[8];
510
17.2M
      coefficients[0] = (block0 + block1) * 0.5f;
511
17.2M
      coefficients[8] = (block0 - block1) * 0.5f;
512
17.2M
      break;
513
0
    }
514
17.2M
    case Type::DCT4X8: {
515
51.8M
      for (size_t y = 0; y < 2; y++) {
516
34.5M
        HWY_ALIGN float block[4 * 8];
517
34.5M
        ComputeScaledDCT<4, 8>()(
518
34.5M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
34.5M
            scratch_space);
520
172M
        for (size_t iy = 0; iy < 4; iy++) {
521
1.24G
          for (size_t ix = 0; ix < 8; ix++) {
522
1.10G
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
1.10G
          }
524
138M
        }
525
34.5M
      }
526
17.2M
      float block0 = coefficients[0];
527
17.2M
      float block1 = coefficients[8];
528
17.2M
      coefficients[0] = (block0 + block1) * 0.5f;
529
17.2M
      coefficients[8] = (block0 - block1) * 0.5f;
530
17.2M
      break;
531
0
    }
532
17.2M
    case Type::DCT4X4: {
533
51.8M
      for (size_t y = 0; y < 2; y++) {
534
103M
        for (size_t x = 0; x < 2; x++) {
535
69.1M
          HWY_ALIGN float block[4 * 4];
536
69.1M
          ComputeScaledDCT<4, 4>()(
537
69.1M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
69.1M
              block, scratch_space);
539
345M
          for (size_t iy = 0; iy < 4; iy++) {
540
1.38G
            for (size_t ix = 0; ix < 4; ix++) {
541
1.10G
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
1.10G
            }
543
276M
          }
544
69.1M
        }
545
34.5M
      }
546
17.2M
      float block00 = coefficients[0];
547
17.2M
      float block01 = coefficients[1];
548
17.2M
      float block10 = coefficients[8];
549
17.2M
      float block11 = coefficients[9];
550
17.2M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
17.2M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
17.2M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
17.2M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
17.2M
      break;
555
0
    }
556
17.2M
    case Type::DCT2X2: {
557
17.2M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
17.2M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
17.2M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
17.2M
      break;
561
0
    }
562
7.13M
    case Type::DCT16X16: {
563
7.13M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
7.13M
                                 scratch_space);
565
7.13M
      break;
566
0
    }
567
13.9M
    case Type::DCT16X8: {
568
13.9M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
13.9M
                                scratch_space);
570
13.9M
      break;
571
0
    }
572
14.0M
    case Type::DCT8X16: {
573
14.0M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
14.0M
                                scratch_space);
575
14.0M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
2.75M
    case Type::DCT32X16: {
588
2.75M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
2.75M
                                 scratch_space);
590
2.75M
      break;
591
0
    }
592
2.79M
    case Type::DCT16X32: {
593
2.79M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
2.79M
                                 scratch_space);
595
2.79M
      break;
596
0
    }
597
1.40M
    case Type::DCT32X32: {
598
1.40M
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
1.40M
                                 scratch_space);
600
1.40M
      break;
601
0
    }
602
17.2M
    case Type::DCT: {
603
17.2M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
17.2M
                               scratch_space);
605
17.2M
      break;
606
0
    }
607
17.2M
    case Type::AFV0: {
608
17.2M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
17.2M
      break;
610
0
    }
611
17.2M
    case Type::AFV1: {
612
17.2M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
17.2M
      break;
614
0
    }
615
17.2M
    case Type::AFV2: {
616
17.2M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
17.2M
      break;
618
0
    }
619
17.2M
    case Type::AFV3: {
620
17.2M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
17.2M
      break;
622
0
    }
623
240k
    case Type::DCT64X64: {
624
240k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
240k
                                 scratch_space);
626
240k
      break;
627
0
    }
628
882k
    case Type::DCT64X32: {
629
882k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
882k
                                 scratch_space);
631
882k
      break;
632
0
    }
633
580k
    case Type::DCT32X64: {
634
580k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
580k
                                 scratch_space);
636
580k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
216M
  }
669
216M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
25.6M
                                          float* JXL_RESTRICT scratch_space) {
462
25.6M
  using Type = AcStrategyType;
463
25.6M
  switch (strategy) {
464
1.00M
    case Type::IDENTITY: {
465
3.01M
      for (size_t y = 0; y < 2; y++) {
466
6.03M
        for (size_t x = 0; x < 2; x++) {
467
4.02M
          float block_dc = 0;
468
20.1M
          for (size_t iy = 0; iy < 4; iy++) {
469
80.4M
            for (size_t ix = 0; ix < 4; ix++) {
470
64.3M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
64.3M
            }
472
16.0M
          }
473
4.02M
          block_dc *= 1.0f / 16;
474
20.1M
          for (size_t iy = 0; iy < 4; iy++) {
475
80.4M
            for (size_t ix = 0; ix < 4; ix++) {
476
64.3M
              if (ix == 1 && iy == 1) continue;
477
60.3M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
60.3M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
60.3M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
60.3M
            }
481
16.0M
          }
482
4.02M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
4.02M
          coefficients[y * 8 + x] = block_dc;
484
4.02M
        }
485
2.01M
      }
486
1.00M
      float block00 = coefficients[0];
487
1.00M
      float block01 = coefficients[1];
488
1.00M
      float block10 = coefficients[8];
489
1.00M
      float block11 = coefficients[9];
490
1.00M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
1.00M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
1.00M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
1.00M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
1.00M
      break;
495
0
    }
496
401k
    case Type::DCT8X4: {
497
1.20M
      for (size_t x = 0; x < 2; x++) {
498
802k
        HWY_ALIGN float block[4 * 8];
499
802k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
802k
                                 scratch_space);
501
4.01M
        for (size_t iy = 0; iy < 4; iy++) {
502
28.8M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
25.6M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
25.6M
          }
506
3.20M
        }
507
802k
      }
508
401k
      float block0 = coefficients[0];
509
401k
      float block1 = coefficients[8];
510
401k
      coefficients[0] = (block0 + block1) * 0.5f;
511
401k
      coefficients[8] = (block0 - block1) * 0.5f;
512
401k
      break;
513
0
    }
514
294k
    case Type::DCT4X8: {
515
883k
      for (size_t y = 0; y < 2; y++) {
516
588k
        HWY_ALIGN float block[4 * 8];
517
588k
        ComputeScaledDCT<4, 8>()(
518
588k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
588k
            scratch_space);
520
2.94M
        for (size_t iy = 0; iy < 4; iy++) {
521
21.2M
          for (size_t ix = 0; ix < 8; ix++) {
522
18.8M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
18.8M
          }
524
2.35M
        }
525
588k
      }
526
294k
      float block0 = coefficients[0];
527
294k
      float block1 = coefficients[8];
528
294k
      coefficients[0] = (block0 + block1) * 0.5f;
529
294k
      coefficients[8] = (block0 - block1) * 0.5f;
530
294k
      break;
531
0
    }
532
2.38k
    case Type::DCT4X4: {
533
7.14k
      for (size_t y = 0; y < 2; y++) {
534
14.2k
        for (size_t x = 0; x < 2; x++) {
535
9.52k
          HWY_ALIGN float block[4 * 4];
536
9.52k
          ComputeScaledDCT<4, 4>()(
537
9.52k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
9.52k
              block, scratch_space);
539
47.6k
          for (size_t iy = 0; iy < 4; iy++) {
540
190k
            for (size_t ix = 0; ix < 4; ix++) {
541
152k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
152k
            }
543
38.1k
          }
544
9.52k
        }
545
4.76k
      }
546
2.38k
      float block00 = coefficients[0];
547
2.38k
      float block01 = coefficients[1];
548
2.38k
      float block10 = coefficients[8];
549
2.38k
      float block11 = coefficients[9];
550
2.38k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
2.38k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
2.38k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
2.38k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
2.38k
      break;
555
0
    }
556
1.04M
    case Type::DCT2X2: {
557
1.04M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
1.04M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
1.04M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
1.04M
      break;
561
0
    }
562
153k
    case Type::DCT16X16: {
563
153k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
153k
                                 scratch_space);
565
153k
      break;
566
0
    }
567
257k
    case Type::DCT16X8: {
568
257k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
257k
                                scratch_space);
570
257k
      break;
571
0
    }
572
382k
    case Type::DCT8X16: {
573
382k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
382k
                                scratch_space);
575
382k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
65.4k
    case Type::DCT32X16: {
588
65.4k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
65.4k
                                 scratch_space);
590
65.4k
      break;
591
0
    }
592
106k
    case Type::DCT16X32: {
593
106k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
106k
                                 scratch_space);
595
106k
      break;
596
0
    }
597
194k
    case Type::DCT32X32: {
598
194k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
194k
                                 scratch_space);
600
194k
      break;
601
0
    }
602
20.0M
    case Type::DCT: {
603
20.0M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
20.0M
                               scratch_space);
605
20.0M
      break;
606
0
    }
607
295k
    case Type::AFV0: {
608
295k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
295k
      break;
610
0
    }
611
764k
    case Type::AFV1: {
612
764k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
764k
      break;
614
0
    }
615
201k
    case Type::AFV2: {
616
201k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
201k
      break;
618
0
    }
619
313k
    case Type::AFV3: {
620
313k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
313k
      break;
622
0
    }
623
52.2k
    case Type::DCT64X64: {
624
52.2k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
52.2k
                                 scratch_space);
626
52.2k
      break;
627
0
    }
628
6.48k
    case Type::DCT64X32: {
629
6.48k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
6.48k
                                 scratch_space);
631
6.48k
      break;
632
0
    }
633
9.15k
    case Type::DCT32X64: {
634
9.15k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
9.15k
                                 scratch_space);
636
9.15k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
25.6M
  }
669
25.6M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
670
671
// `scratch_space` should be at least 4 * kMaxBlocks * kMaxBlocks elements.
672
HWY_MAYBE_UNUSED void DCFromLowestFrequencies(const AcStrategyType strategy,
673
                                              const float* block, float* dc,
674
                                              size_t dc_stride,
675
33.9M
                                              float* scratch_space) {
676
33.9M
  using Type = AcStrategyType;
677
33.9M
  switch (strategy) {
678
514k
    case Type::DCT16X8: {
679
514k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
514k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
514k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
514k
      break;
683
0
    }
684
764k
    case Type::DCT8X16: {
685
764k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
764k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
764k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
764k
      break;
689
0
    }
690
306k
    case Type::DCT16X16: {
691
306k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
306k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
306k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
306k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
130k
    case Type::DCT32X16: {
709
130k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
130k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
130k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
130k
      break;
713
0
    }
714
212k
    case Type::DCT16X32: {
715
212k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
212k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
212k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
212k
      break;
719
0
    }
720
388k
    case Type::DCT32X32: {
721
388k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
388k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
388k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
388k
      break;
725
0
    }
726
12.9k
    case Type::DCT64X32: {
727
12.9k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
12.9k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
12.9k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
12.9k
      break;
731
0
    }
732
18.3k
    case Type::DCT32X64: {
733
18.3k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
18.3k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
18.3k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
18.3k
      break;
737
0
    }
738
104k
    case Type::DCT64X64: {
739
104k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
104k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
104k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
104k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
22.8M
    case Type::DCT:
787
24.9M
    case Type::DCT2X2:
788
24.9M
    case Type::DCT4X4:
789
25.4M
    case Type::DCT4X8:
790
26.3M
    case Type::DCT8X4:
791
26.8M
    case Type::AFV0:
792
28.4M
    case Type::AFV1:
793
28.8M
    case Type::AFV2:
794
29.4M
    case Type::AFV3:
795
31.4M
    case Type::IDENTITY:
796
31.4M
      dc[0] = block[0];
797
31.4M
      break;
798
33.9M
  }
799
33.9M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
8.30M
                                              float* scratch_space) {
676
8.30M
  using Type = AcStrategyType;
677
8.30M
  switch (strategy) {
678
257k
    case Type::DCT16X8: {
679
257k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
257k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
257k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
257k
      break;
683
0
    }
684
382k
    case Type::DCT8X16: {
685
382k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
382k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
382k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
382k
      break;
689
0
    }
690
153k
    case Type::DCT16X16: {
691
153k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
153k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
153k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
153k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
65.4k
    case Type::DCT32X16: {
709
65.4k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
65.4k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
65.4k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
65.4k
      break;
713
0
    }
714
106k
    case Type::DCT16X32: {
715
106k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
106k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
106k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
106k
      break;
719
0
    }
720
194k
    case Type::DCT32X32: {
721
194k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
194k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
194k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
194k
      break;
725
0
    }
726
6.48k
    case Type::DCT64X32: {
727
6.48k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
6.48k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
6.48k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
6.48k
      break;
731
0
    }
732
9.15k
    case Type::DCT32X64: {
733
9.15k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
9.15k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
9.15k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
9.15k
      break;
737
0
    }
738
52.2k
    case Type::DCT64X64: {
739
52.2k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
52.2k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
52.2k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
52.2k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
2.75M
    case Type::DCT:
787
3.80M
    case Type::DCT2X2:
788
3.80M
    case Type::DCT4X4:
789
4.09M
    case Type::DCT4X8:
790
4.50M
    case Type::DCT8X4:
791
4.79M
    case Type::AFV0:
792
5.56M
    case Type::AFV1:
793
5.76M
    case Type::AFV2:
794
6.07M
    case Type::AFV3:
795
7.08M
    case Type::IDENTITY:
796
7.08M
      dc[0] = block[0];
797
7.08M
      break;
798
8.30M
  }
799
8.30M
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
25.6M
                                              float* scratch_space) {
676
25.6M
  using Type = AcStrategyType;
677
25.6M
  switch (strategy) {
678
257k
    case Type::DCT16X8: {
679
257k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
257k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
257k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
257k
      break;
683
0
    }
684
382k
    case Type::DCT8X16: {
685
382k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
382k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
382k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
382k
      break;
689
0
    }
690
153k
    case Type::DCT16X16: {
691
153k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
153k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
153k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
153k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
65.4k
    case Type::DCT32X16: {
709
65.4k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
65.4k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
65.4k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
65.4k
      break;
713
0
    }
714
106k
    case Type::DCT16X32: {
715
106k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
106k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
106k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
106k
      break;
719
0
    }
720
194k
    case Type::DCT32X32: {
721
194k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
194k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
194k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
194k
      break;
725
0
    }
726
6.48k
    case Type::DCT64X32: {
727
6.48k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
6.48k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
6.48k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
6.48k
      break;
731
0
    }
732
9.15k
    case Type::DCT32X64: {
733
9.15k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
9.15k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
9.15k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
9.15k
      break;
737
0
    }
738
52.2k
    case Type::DCT64X64: {
739
52.2k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
52.2k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
52.2k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
52.2k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
20.0M
    case Type::DCT:
787
21.1M
    case Type::DCT2X2:
788
21.1M
    case Type::DCT4X4:
789
21.3M
    case Type::DCT4X8:
790
21.8M
    case Type::DCT8X4:
791
22.0M
    case Type::AFV0:
792
22.8M
    case Type::AFV1:
793
23.0M
    case Type::AFV2:
794
23.3M
    case Type::AFV3:
795
24.3M
    case Type::IDENTITY:
796
24.3M
      dc[0] = block[0];
797
24.3M
      break;
798
25.6M
  }
799
25.6M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
800
801
}  // namespace
802
// NOLINTNEXTLINE(google-readability-namespace-comments)
803
}  // namespace HWY_NAMESPACE
804
}  // namespace jxl
805
HWY_AFTER_NAMESPACE();
806
807
#endif  // LIB_JXL_ENC_TRANSFORMS_INL_H_