Coverage Report

Created: 2026-06-16 07:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/enc_transforms-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/base/compiler_specific.h"
7
#include "lib/jxl/frame_dimensions.h"
8
9
#if defined(LIB_JXL_ENC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
10
#ifdef LIB_JXL_ENC_TRANSFORMS_INL_H_
11
#undef LIB_JXL_ENC_TRANSFORMS_INL_H_
12
#else
13
#define LIB_JXL_ENC_TRANSFORMS_INL_H_
14
#endif
15
16
#include <cstddef>
17
#include <cstdint>
18
#include <hwy/highway.h>
19
20
#include "lib/jxl/ac_strategy.h"
21
#include "lib/jxl/dct-inl.h"
22
#include "lib/jxl/dct_scales.h"
23
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
27
enum class AcStrategyType : uint32_t;
28
29
namespace HWY_NAMESPACE {
30
namespace {
31
32
constexpr size_t kMaxBlocks = 32;
33
34
// Inverse of ReinterpretingDCT.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
HWY_INLINE void ReinterpretingIDCT(const float* input,
38
                                   const size_t input_stride, float* output,
39
2.81M
                                   const size_t output_stride, float* scratch) {
40
2.81M
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
2.81M
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
2.81M
  float* block = scratch;
43
2.81M
  if (ROWS < COLS) {
44
2.59M
    for (size_t y = 0; y < LF_ROWS; y++) {
45
5.84M
      for (size_t x = 0; x < LF_COLS; x++) {
46
4.39M
        block[y * COLS + x] = input[y * input_stride + x] *
47
4.39M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
4.39M
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
4.39M
      }
50
1.45M
    }
51
1.67M
  } else {
52
6.25M
    for (size_t y = 0; y < LF_COLS; y++) {
53
24.8M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
20.2M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
20.2M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
20.2M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
20.2M
      }
58
4.58M
    }
59
1.67M
  }
60
61
2.81M
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
2.81M
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
2.81M
                                  scratch_space);
64
2.81M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
274k
                                   const size_t output_stride, float* scratch) {
40
274k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
274k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
274k
  float* block = scratch;
43
274k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
274k
  } else {
52
549k
    for (size_t y = 0; y < LF_COLS; y++) {
53
824k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
549k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
549k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
549k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
549k
      }
58
274k
    }
59
274k
  }
60
61
274k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
274k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
274k
                                  scratch_space);
64
274k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
436k
                                   const size_t output_stride, float* scratch) {
40
436k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
436k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
436k
  float* block = scratch;
43
436k
  if (ROWS < COLS) {
44
872k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.30M
      for (size_t x = 0; x < LF_COLS; x++) {
46
872k
        block[y * COLS + x] = input[y * input_stride + x] *
47
872k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
872k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
872k
      }
50
436k
    }
51
436k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
436k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
436k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
436k
                                  scratch_space);
64
436k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
166k
                                   const size_t output_stride, float* scratch) {
40
166k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
166k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
166k
  float* block = scratch;
43
166k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
166k
  } else {
52
499k
    for (size_t y = 0; y < LF_COLS; y++) {
53
998k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
665k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
665k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
665k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
665k
      }
58
332k
    }
59
166k
  }
60
61
166k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
166k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
166k
                                  scratch_space);
64
166k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
74.3k
                                   const size_t output_stride, float* scratch) {
40
74.3k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
74.3k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
74.3k
  float* block = scratch;
43
74.3k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
74.3k
  } else {
52
222k
    for (size_t y = 0; y < LF_COLS; y++) {
53
743k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
594k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
594k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
594k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
594k
      }
58
148k
    }
59
74.3k
  }
60
61
74.3k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
74.3k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
74.3k
                                  scratch_space);
64
74.3k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
124k
                                   const size_t output_stride, float* scratch) {
40
124k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
124k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
124k
  float* block = scratch;
43
124k
  if (ROWS < COLS) {
44
373k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.24M
      for (size_t x = 0; x < LF_COLS; x++) {
46
996k
        block[y * COLS + x] = input[y * input_stride + x] *
47
996k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
996k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
996k
      }
50
249k
    }
51
124k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
124k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
124k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
124k
                                  scratch_space);
64
124k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
248k
                                   const size_t output_stride, float* scratch) {
40
248k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
248k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
248k
  float* block = scratch;
43
248k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
248k
  } else {
52
1.24M
    for (size_t y = 0; y < LF_COLS; y++) {
53
4.96M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
3.96M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
3.96M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
3.96M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
3.96M
      }
58
992k
    }
59
248k
  }
60
61
248k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
248k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
248k
                                  scratch_space);
64
248k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
7.77k
                                   const size_t output_stride, float* scratch) {
40
7.77k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
7.77k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
7.77k
  float* block = scratch;
43
7.77k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
7.77k
  } else {
52
38.8k
    for (size_t y = 0; y < LF_COLS; y++) {
53
279k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
248k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
248k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
248k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
248k
      }
58
31.1k
    }
59
7.77k
  }
60
61
7.77k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
7.77k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
7.77k
                                  scratch_space);
64
7.77k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
10.2k
                                   const size_t output_stride, float* scratch) {
40
10.2k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
10.2k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
10.2k
  float* block = scratch;
43
10.2k
  if (ROWS < COLS) {
44
51.3k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
369k
      for (size_t x = 0; x < LF_COLS; x++) {
46
328k
        block[y * COLS + x] = input[y * input_stride + x] *
47
328k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
328k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
328k
      }
50
41.0k
    }
51
10.2k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
10.2k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
10.2k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
10.2k
                                  scratch_space);
64
10.2k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
64.1k
                                   const size_t output_stride, float* scratch) {
40
64.1k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
64.1k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
64.1k
  float* block = scratch;
43
64.1k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
64.1k
  } else {
52
576k
    for (size_t y = 0; y < LF_COLS; y++) {
53
4.61M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
4.10M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
4.10M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
4.10M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
4.10M
      }
58
512k
    }
59
64.1k
  }
60
61
64.1k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
64.1k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
64.1k
                                  scratch_space);
64
64.1k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
274k
                                   const size_t output_stride, float* scratch) {
40
274k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
274k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
274k
  float* block = scratch;
43
274k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
274k
  } else {
52
549k
    for (size_t y = 0; y < LF_COLS; y++) {
53
824k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
549k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
549k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
549k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
549k
      }
58
274k
    }
59
274k
  }
60
61
274k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
274k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
274k
                                  scratch_space);
64
274k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
436k
                                   const size_t output_stride, float* scratch) {
40
436k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
436k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
436k
  float* block = scratch;
43
436k
  if (ROWS < COLS) {
44
872k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.30M
      for (size_t x = 0; x < LF_COLS; x++) {
46
872k
        block[y * COLS + x] = input[y * input_stride + x] *
47
872k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
872k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
872k
      }
50
436k
    }
51
436k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
436k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
436k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
436k
                                  scratch_space);
64
436k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
166k
                                   const size_t output_stride, float* scratch) {
40
166k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
166k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
166k
  float* block = scratch;
43
166k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
166k
  } else {
52
499k
    for (size_t y = 0; y < LF_COLS; y++) {
53
998k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
665k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
665k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
665k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
665k
      }
58
332k
    }
59
166k
  }
60
61
166k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
166k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
166k
                                  scratch_space);
64
166k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
74.3k
                                   const size_t output_stride, float* scratch) {
40
74.3k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
74.3k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
74.3k
  float* block = scratch;
43
74.3k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
74.3k
  } else {
52
222k
    for (size_t y = 0; y < LF_COLS; y++) {
53
743k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
594k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
594k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
594k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
594k
      }
58
148k
    }
59
74.3k
  }
60
61
74.3k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
74.3k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
74.3k
                                  scratch_space);
64
74.3k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
124k
                                   const size_t output_stride, float* scratch) {
40
124k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
124k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
124k
  float* block = scratch;
43
124k
  if (ROWS < COLS) {
44
373k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.24M
      for (size_t x = 0; x < LF_COLS; x++) {
46
996k
        block[y * COLS + x] = input[y * input_stride + x] *
47
996k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
996k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
996k
      }
50
249k
    }
51
124k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
124k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
124k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
124k
                                  scratch_space);
64
124k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
248k
                                   const size_t output_stride, float* scratch) {
40
248k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
248k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
248k
  float* block = scratch;
43
248k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
248k
  } else {
52
1.24M
    for (size_t y = 0; y < LF_COLS; y++) {
53
4.96M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
3.96M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
3.96M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
3.96M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
3.96M
      }
58
992k
    }
59
248k
  }
60
61
248k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
248k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
248k
                                  scratch_space);
64
248k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
7.77k
                                   const size_t output_stride, float* scratch) {
40
7.77k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
7.77k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
7.77k
  float* block = scratch;
43
7.77k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
7.77k
  } else {
52
38.8k
    for (size_t y = 0; y < LF_COLS; y++) {
53
279k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
248k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
248k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
248k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
248k
      }
58
31.1k
    }
59
7.77k
  }
60
61
7.77k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
7.77k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
7.77k
                                  scratch_space);
64
7.77k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
10.2k
                                   const size_t output_stride, float* scratch) {
40
10.2k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
10.2k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
10.2k
  float* block = scratch;
43
10.2k
  if (ROWS < COLS) {
44
51.3k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
369k
      for (size_t x = 0; x < LF_COLS; x++) {
46
328k
        block[y * COLS + x] = input[y * input_stride + x] *
47
328k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
328k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
328k
      }
50
41.0k
    }
51
10.2k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
10.2k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
10.2k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
10.2k
                                  scratch_space);
64
10.2k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
64.1k
                                   const size_t output_stride, float* scratch) {
40
64.1k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
64.1k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
64.1k
  float* block = scratch;
43
64.1k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
64.1k
  } else {
52
576k
    for (size_t y = 0; y < LF_COLS; y++) {
53
4.61M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
4.10M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
4.10M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
4.10M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
4.10M
      }
58
512k
    }
59
64.1k
  }
60
61
64.1k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
64.1k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
64.1k
                                  scratch_space);
64
64.1k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
65
66
template <size_t S>
67
68.6M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
68.6M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
68.6M
  static_assert(S % 2 == 0, "S should be even");
70
68.6M
  float temp[kDCTBlockSize];
71
68.6M
  constexpr size_t num_2x2 = S / 2;
72
228M
  for (size_t y = 0; y < num_2x2; y++) {
73
640M
    for (size_t x = 0; x < num_2x2; x++) {
74
480M
      float c00 = block[y * 2 * stride + x * 2];
75
480M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
480M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
480M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
480M
      float r00 = c00 + c01 + c10 + c11;
79
480M
      float r01 = c00 + c01 - c10 - c11;
80
480M
      float r10 = c00 - c01 + c10 - c11;
81
480M
      float r11 = c00 - c01 - c10 + c11;
82
480M
      r00 *= 0.25f;
83
480M
      r01 *= 0.25f;
84
480M
      r10 *= 0.25f;
85
480M
      r11 *= 0.25f;
86
480M
      temp[y * kBlockDim + x] = r00;
87
480M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
480M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
480M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
480M
    }
91
160M
  }
92
388M
  for (size_t y = 0; y < S; y++) {
93
2.24G
    for (size_t x = 0; x < S; x++) {
94
1.92G
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
1.92G
    }
96
320M
  }
97
68.6M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.16M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.16M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.16M
  static_assert(S % 2 == 0, "S should be even");
70
1.16M
  float temp[kDCTBlockSize];
71
1.16M
  constexpr size_t num_2x2 = S / 2;
72
5.82M
  for (size_t y = 0; y < num_2x2; y++) {
73
23.3M
    for (size_t x = 0; x < num_2x2; x++) {
74
18.6M
      float c00 = block[y * 2 * stride + x * 2];
75
18.6M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
18.6M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
18.6M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
18.6M
      float r00 = c00 + c01 + c10 + c11;
79
18.6M
      float r01 = c00 + c01 - c10 - c11;
80
18.6M
      float r10 = c00 - c01 + c10 - c11;
81
18.6M
      float r11 = c00 - c01 - c10 + c11;
82
18.6M
      r00 *= 0.25f;
83
18.6M
      r01 *= 0.25f;
84
18.6M
      r10 *= 0.25f;
85
18.6M
      r11 *= 0.25f;
86
18.6M
      temp[y * kBlockDim + x] = r00;
87
18.6M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
18.6M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
18.6M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
18.6M
    }
91
4.66M
  }
92
10.4M
  for (size_t y = 0; y < S; y++) {
93
83.9M
    for (size_t x = 0; x < S; x++) {
94
74.6M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
74.6M
    }
96
9.32M
  }
97
1.16M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.16M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.16M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.16M
  static_assert(S % 2 == 0, "S should be even");
70
1.16M
  float temp[kDCTBlockSize];
71
1.16M
  constexpr size_t num_2x2 = S / 2;
72
3.49M
  for (size_t y = 0; y < num_2x2; y++) {
73
6.99M
    for (size_t x = 0; x < num_2x2; x++) {
74
4.66M
      float c00 = block[y * 2 * stride + x * 2];
75
4.66M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
4.66M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
4.66M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
4.66M
      float r00 = c00 + c01 + c10 + c11;
79
4.66M
      float r01 = c00 + c01 - c10 - c11;
80
4.66M
      float r10 = c00 - c01 + c10 - c11;
81
4.66M
      float r11 = c00 - c01 - c10 + c11;
82
4.66M
      r00 *= 0.25f;
83
4.66M
      r01 *= 0.25f;
84
4.66M
      r10 *= 0.25f;
85
4.66M
      r11 *= 0.25f;
86
4.66M
      temp[y * kBlockDim + x] = r00;
87
4.66M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
4.66M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
4.66M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
4.66M
    }
91
2.33M
  }
92
5.82M
  for (size_t y = 0; y < S; y++) {
93
23.3M
    for (size_t x = 0; x < S; x++) {
94
18.6M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
18.6M
    }
96
4.66M
  }
97
1.16M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.16M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.16M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.16M
  static_assert(S % 2 == 0, "S should be even");
70
1.16M
  float temp[kDCTBlockSize];
71
1.16M
  constexpr size_t num_2x2 = S / 2;
72
2.33M
  for (size_t y = 0; y < num_2x2; y++) {
73
2.33M
    for (size_t x = 0; x < num_2x2; x++) {
74
1.16M
      float c00 = block[y * 2 * stride + x * 2];
75
1.16M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
1.16M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
1.16M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
1.16M
      float r00 = c00 + c01 + c10 + c11;
79
1.16M
      float r01 = c00 + c01 - c10 - c11;
80
1.16M
      float r10 = c00 - c01 + c10 - c11;
81
1.16M
      float r11 = c00 - c01 - c10 + c11;
82
1.16M
      r00 *= 0.25f;
83
1.16M
      r01 *= 0.25f;
84
1.16M
      r10 *= 0.25f;
85
1.16M
      r11 *= 0.25f;
86
1.16M
      temp[y * kBlockDim + x] = r00;
87
1.16M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
1.16M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
1.16M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
1.16M
    }
91
1.16M
  }
92
3.49M
  for (size_t y = 0; y < S; y++) {
93
6.99M
    for (size_t x = 0; x < S; x++) {
94
4.66M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
4.66M
    }
96
2.33M
  }
97
1.16M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
20.5M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
20.5M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
20.5M
  static_assert(S % 2 == 0, "S should be even");
70
20.5M
  float temp[kDCTBlockSize];
71
20.5M
  constexpr size_t num_2x2 = S / 2;
72
102M
  for (size_t y = 0; y < num_2x2; y++) {
73
410M
    for (size_t x = 0; x < num_2x2; x++) {
74
328M
      float c00 = block[y * 2 * stride + x * 2];
75
328M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
328M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
328M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
328M
      float r00 = c00 + c01 + c10 + c11;
79
328M
      float r01 = c00 + c01 - c10 - c11;
80
328M
      float r10 = c00 - c01 + c10 - c11;
81
328M
      float r11 = c00 - c01 - c10 + c11;
82
328M
      r00 *= 0.25f;
83
328M
      r01 *= 0.25f;
84
328M
      r10 *= 0.25f;
85
328M
      r11 *= 0.25f;
86
328M
      temp[y * kBlockDim + x] = r00;
87
328M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
328M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
328M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
328M
    }
91
82.1M
  }
92
184M
  for (size_t y = 0; y < S; y++) {
93
1.47G
    for (size_t x = 0; x < S; x++) {
94
1.31G
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
1.31G
    }
96
164M
  }
97
20.5M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
20.5M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
20.5M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
20.5M
  static_assert(S % 2 == 0, "S should be even");
70
20.5M
  float temp[kDCTBlockSize];
71
20.5M
  constexpr size_t num_2x2 = S / 2;
72
61.6M
  for (size_t y = 0; y < num_2x2; y++) {
73
123M
    for (size_t x = 0; x < num_2x2; x++) {
74
82.1M
      float c00 = block[y * 2 * stride + x * 2];
75
82.1M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
82.1M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
82.1M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
82.1M
      float r00 = c00 + c01 + c10 + c11;
79
82.1M
      float r01 = c00 + c01 - c10 - c11;
80
82.1M
      float r10 = c00 - c01 + c10 - c11;
81
82.1M
      float r11 = c00 - c01 - c10 + c11;
82
82.1M
      r00 *= 0.25f;
83
82.1M
      r01 *= 0.25f;
84
82.1M
      r10 *= 0.25f;
85
82.1M
      r11 *= 0.25f;
86
82.1M
      temp[y * kBlockDim + x] = r00;
87
82.1M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
82.1M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
82.1M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
82.1M
    }
91
41.0M
  }
92
102M
  for (size_t y = 0; y < S; y++) {
93
410M
    for (size_t x = 0; x < S; x++) {
94
328M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
328M
    }
96
82.1M
  }
97
20.5M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
20.5M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
20.5M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
20.5M
  static_assert(S % 2 == 0, "S should be even");
70
20.5M
  float temp[kDCTBlockSize];
71
20.5M
  constexpr size_t num_2x2 = S / 2;
72
41.0M
  for (size_t y = 0; y < num_2x2; y++) {
73
41.0M
    for (size_t x = 0; x < num_2x2; x++) {
74
20.5M
      float c00 = block[y * 2 * stride + x * 2];
75
20.5M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
20.5M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
20.5M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
20.5M
      float r00 = c00 + c01 + c10 + c11;
79
20.5M
      float r01 = c00 + c01 - c10 - c11;
80
20.5M
      float r10 = c00 - c01 + c10 - c11;
81
20.5M
      float r11 = c00 - c01 - c10 + c11;
82
20.5M
      r00 *= 0.25f;
83
20.5M
      r01 *= 0.25f;
84
20.5M
      r10 *= 0.25f;
85
20.5M
      r11 *= 0.25f;
86
20.5M
      temp[y * kBlockDim + x] = r00;
87
20.5M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
20.5M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
20.5M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
20.5M
    }
91
20.5M
  }
92
61.6M
  for (size_t y = 0; y < S; y++) {
93
123M
    for (size_t x = 0; x < S; x++) {
94
82.1M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
82.1M
    }
96
41.0M
  }
97
20.5M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.16M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.16M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.16M
  static_assert(S % 2 == 0, "S should be even");
70
1.16M
  float temp[kDCTBlockSize];
71
1.16M
  constexpr size_t num_2x2 = S / 2;
72
5.82M
  for (size_t y = 0; y < num_2x2; y++) {
73
23.3M
    for (size_t x = 0; x < num_2x2; x++) {
74
18.6M
      float c00 = block[y * 2 * stride + x * 2];
75
18.6M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
18.6M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
18.6M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
18.6M
      float r00 = c00 + c01 + c10 + c11;
79
18.6M
      float r01 = c00 + c01 - c10 - c11;
80
18.6M
      float r10 = c00 - c01 + c10 - c11;
81
18.6M
      float r11 = c00 - c01 - c10 + c11;
82
18.6M
      r00 *= 0.25f;
83
18.6M
      r01 *= 0.25f;
84
18.6M
      r10 *= 0.25f;
85
18.6M
      r11 *= 0.25f;
86
18.6M
      temp[y * kBlockDim + x] = r00;
87
18.6M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
18.6M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
18.6M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
18.6M
    }
91
4.66M
  }
92
10.4M
  for (size_t y = 0; y < S; y++) {
93
83.9M
    for (size_t x = 0; x < S; x++) {
94
74.6M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
74.6M
    }
96
9.32M
  }
97
1.16M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.16M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.16M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.16M
  static_assert(S % 2 == 0, "S should be even");
70
1.16M
  float temp[kDCTBlockSize];
71
1.16M
  constexpr size_t num_2x2 = S / 2;
72
3.49M
  for (size_t y = 0; y < num_2x2; y++) {
73
6.99M
    for (size_t x = 0; x < num_2x2; x++) {
74
4.66M
      float c00 = block[y * 2 * stride + x * 2];
75
4.66M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
4.66M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
4.66M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
4.66M
      float r00 = c00 + c01 + c10 + c11;
79
4.66M
      float r01 = c00 + c01 - c10 - c11;
80
4.66M
      float r10 = c00 - c01 + c10 - c11;
81
4.66M
      float r11 = c00 - c01 - c10 + c11;
82
4.66M
      r00 *= 0.25f;
83
4.66M
      r01 *= 0.25f;
84
4.66M
      r10 *= 0.25f;
85
4.66M
      r11 *= 0.25f;
86
4.66M
      temp[y * kBlockDim + x] = r00;
87
4.66M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
4.66M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
4.66M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
4.66M
    }
91
2.33M
  }
92
5.82M
  for (size_t y = 0; y < S; y++) {
93
23.3M
    for (size_t x = 0; x < S; x++) {
94
18.6M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
18.6M
    }
96
4.66M
  }
97
1.16M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.16M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.16M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.16M
  static_assert(S % 2 == 0, "S should be even");
70
1.16M
  float temp[kDCTBlockSize];
71
1.16M
  constexpr size_t num_2x2 = S / 2;
72
2.33M
  for (size_t y = 0; y < num_2x2; y++) {
73
2.33M
    for (size_t x = 0; x < num_2x2; x++) {
74
1.16M
      float c00 = block[y * 2 * stride + x * 2];
75
1.16M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
1.16M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
1.16M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
1.16M
      float r00 = c00 + c01 + c10 + c11;
79
1.16M
      float r01 = c00 + c01 - c10 - c11;
80
1.16M
      float r10 = c00 - c01 + c10 - c11;
81
1.16M
      float r11 = c00 - c01 - c10 + c11;
82
1.16M
      r00 *= 0.25f;
83
1.16M
      r01 *= 0.25f;
84
1.16M
      r10 *= 0.25f;
85
1.16M
      r11 *= 0.25f;
86
1.16M
      temp[y * kBlockDim + x] = r00;
87
1.16M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
1.16M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
1.16M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
1.16M
    }
91
1.16M
  }
92
3.49M
  for (size_t y = 0; y < S; y++) {
93
6.99M
    for (size_t x = 0; x < S; x++) {
94
4.66M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
4.66M
    }
96
2.33M
  }
97
1.16M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
98
99
85.9M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
85.9M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
85.9M
      {
102
85.9M
          0.2500000000000000,
103
85.9M
          0.8769029297991420f,
104
85.9M
          0.0000000000000000,
105
85.9M
          0.0000000000000000,
106
85.9M
          0.0000000000000000,
107
85.9M
          -0.4105377591765233f,
108
85.9M
          0.0000000000000000,
109
85.9M
          0.0000000000000000,
110
85.9M
          0.0000000000000000,
111
85.9M
          0.0000000000000000,
112
85.9M
          0.0000000000000000,
113
85.9M
          0.0000000000000000,
114
85.9M
          0.0000000000000000,
115
85.9M
          0.0000000000000000,
116
85.9M
          0.0000000000000000,
117
85.9M
          0.0000000000000000,
118
85.9M
      },
119
85.9M
      {
120
85.9M
          0.2500000000000000,
121
85.9M
          0.2206518106944235f,
122
85.9M
          0.0000000000000000,
123
85.9M
          0.0000000000000000,
124
85.9M
          -0.7071067811865474f,
125
85.9M
          0.6235485373547691f,
126
85.9M
          0.0000000000000000,
127
85.9M
          0.0000000000000000,
128
85.9M
          0.0000000000000000,
129
85.9M
          0.0000000000000000,
130
85.9M
          0.0000000000000000,
131
85.9M
          0.0000000000000000,
132
85.9M
          0.0000000000000000,
133
85.9M
          0.0000000000000000,
134
85.9M
          0.0000000000000000,
135
85.9M
          0.0000000000000000,
136
85.9M
      },
137
85.9M
      {
138
85.9M
          0.2500000000000000,
139
85.9M
          -0.1014005039375376f,
140
85.9M
          0.4067007583026075f,
141
85.9M
          -0.2125574805828875f,
142
85.9M
          0.0000000000000000,
143
85.9M
          -0.0643507165794627f,
144
85.9M
          -0.4517556589999482f,
145
85.9M
          -0.3046847507248690f,
146
85.9M
          0.3017929516615495f,
147
85.9M
          0.4082482904638627f,
148
85.9M
          0.1747866975480809f,
149
85.9M
          -0.2110560104933578f,
150
85.9M
          -0.1426608480880726f,
151
85.9M
          -0.1381354035075859f,
152
85.9M
          -0.1743760259965107f,
153
85.9M
          0.1135498731499434f,
154
85.9M
      },
155
85.9M
      {
156
85.9M
          0.2500000000000000,
157
85.9M
          -0.1014005039375375f,
158
85.9M
          0.4444481661973445f,
159
85.9M
          0.3085497062849767f,
160
85.9M
          0.0000000000000000f,
161
85.9M
          -0.0643507165794627f,
162
85.9M
          0.1585450355184006f,
163
85.9M
          0.5112616136591823f,
164
85.9M
          0.2579236279634118f,
165
85.9M
          0.0000000000000000,
166
85.9M
          0.0812611176717539f,
167
85.9M
          0.1856718091610980f,
168
85.9M
          -0.3416446842253372f,
169
85.9M
          0.3302282550303788f,
170
85.9M
          0.0702790691196284f,
171
85.9M
          -0.0741750459581035f,
172
85.9M
      },
173
85.9M
      {
174
85.9M
          0.2500000000000000,
175
85.9M
          0.2206518106944236f,
176
85.9M
          0.0000000000000000,
177
85.9M
          0.0000000000000000,
178
85.9M
          0.7071067811865476f,
179
85.9M
          0.6235485373547694f,
180
85.9M
          0.0000000000000000,
181
85.9M
          0.0000000000000000,
182
85.9M
          0.0000000000000000,
183
85.9M
          0.0000000000000000,
184
85.9M
          0.0000000000000000,
185
85.9M
          0.0000000000000000,
186
85.9M
          0.0000000000000000,
187
85.9M
          0.0000000000000000,
188
85.9M
          0.0000000000000000,
189
85.9M
          0.0000000000000000,
190
85.9M
      },
191
85.9M
      {
192
85.9M
          0.2500000000000000,
193
85.9M
          -0.1014005039375378f,
194
85.9M
          0.0000000000000000,
195
85.9M
          0.4706702258572536f,
196
85.9M
          0.0000000000000000,
197
85.9M
          -0.0643507165794628f,
198
85.9M
          -0.0403851516082220f,
199
85.9M
          0.0000000000000000,
200
85.9M
          0.1627234014286620f,
201
85.9M
          0.0000000000000000,
202
85.9M
          0.0000000000000000,
203
85.9M
          0.0000000000000000,
204
85.9M
          0.7367497537172237f,
205
85.9M
          0.0875511500058708f,
206
85.9M
          -0.2921026642334881f,
207
85.9M
          0.1940289303259434f,
208
85.9M
      },
209
85.9M
      {
210
85.9M
          0.2500000000000000,
211
85.9M
          -0.1014005039375377f,
212
85.9M
          0.1957439937204294f,
213
85.9M
          -0.1621205195722993f,
214
85.9M
          0.0000000000000000,
215
85.9M
          -0.0643507165794628f,
216
85.9M
          0.0074182263792424f,
217
85.9M
          -0.2904801297289980f,
218
85.9M
          0.0952002265347504f,
219
85.9M
          0.0000000000000000,
220
85.9M
          -0.3675398009862027f,
221
85.9M
          0.4921585901373873f,
222
85.9M
          0.2462710772207515f,
223
85.9M
          -0.0794670660590957f,
224
85.9M
          0.3623817333531167f,
225
85.9M
          -0.4351904965232280f,
226
85.9M
      },
227
85.9M
      {
228
85.9M
          0.2500000000000000,
229
85.9M
          -0.1014005039375376f,
230
85.9M
          0.2929100136981264f,
231
85.9M
          0.0000000000000000,
232
85.9M
          0.0000000000000000,
233
85.9M
          -0.0643507165794627f,
234
85.9M
          0.3935103426921017f,
235
85.9M
          -0.0657870154914280f,
236
85.9M
          0.0000000000000000,
237
85.9M
          -0.4082482904638628f,
238
85.9M
          -0.3078822139579090f,
239
85.9M
          -0.3852501370925192f,
240
85.9M
          -0.0857401903551931f,
241
85.9M
          -0.4613374887461511f,
242
85.9M
          0.0000000000000000,
243
85.9M
          0.2191868483885747f,
244
85.9M
      },
245
85.9M
      {
246
85.9M
          0.2500000000000000,
247
85.9M
          -0.1014005039375376f,
248
85.9M
          -0.4067007583026072f,
249
85.9M
          -0.2125574805828705f,
250
85.9M
          0.0000000000000000,
251
85.9M
          -0.0643507165794627f,
252
85.9M
          -0.4517556589999464f,
253
85.9M
          0.3046847507248840f,
254
85.9M
          0.3017929516615503f,
255
85.9M
          -0.4082482904638635f,
256
85.9M
          -0.1747866975480813f,
257
85.9M
          0.2110560104933581f,
258
85.9M
          -0.1426608480880734f,
259
85.9M
          -0.1381354035075829f,
260
85.9M
          -0.1743760259965108f,
261
85.9M
          0.1135498731499426f,
262
85.9M
      },
263
85.9M
      {
264
85.9M
          0.2500000000000000,
265
85.9M
          -0.1014005039375377f,
266
85.9M
          -0.1957439937204287f,
267
85.9M
          -0.1621205195722833f,
268
85.9M
          0.0000000000000000,
269
85.9M
          -0.0643507165794628f,
270
85.9M
          0.0074182263792444f,
271
85.9M
          0.2904801297290076f,
272
85.9M
          0.0952002265347505f,
273
85.9M
          0.0000000000000000,
274
85.9M
          0.3675398009862011f,
275
85.9M
          -0.4921585901373891f,
276
85.9M
          0.2462710772207514f,
277
85.9M
          -0.0794670660591026f,
278
85.9M
          0.3623817333531165f,
279
85.9M
          -0.4351904965232251f,
280
85.9M
      },
281
85.9M
      {
282
85.9M
          0.2500000000000000,
283
85.9M
          -0.1014005039375375f,
284
85.9M
          0.0000000000000000,
285
85.9M
          -0.4706702258572528f,
286
85.9M
          0.0000000000000000,
287
85.9M
          -0.0643507165794627f,
288
85.9M
          0.1107416575309343f,
289
85.9M
          0.0000000000000000,
290
85.9M
          -0.1627234014286617f,
291
85.9M
          0.0000000000000000,
292
85.9M
          0.0000000000000000,
293
85.9M
          0.0000000000000000,
294
85.9M
          0.1488339922711357f,
295
85.9M
          0.4972464710953509f,
296
85.9M
          0.2921026642334879f,
297
85.9M
          0.5550443808910661f,
298
85.9M
      },
299
85.9M
      {
300
85.9M
          0.2500000000000000,
301
85.9M
          -0.1014005039375377f,
302
85.9M
          0.1137907446044809f,
303
85.9M
          -0.1464291867126764f,
304
85.9M
          0.0000000000000000,
305
85.9M
          -0.0643507165794628f,
306
85.9M
          0.0829816309488205f,
307
85.9M
          -0.2388977352334460f,
308
85.9M
          -0.3531238544981630f,
309
85.9M
          -0.4082482904638630f,
310
85.9M
          0.4826689115059883f,
311
85.9M
          0.1741941265991622f,
312
85.9M
          -0.0476868035022925f,
313
85.9M
          0.1253805944856366f,
314
85.9M
          -0.4326608024727445f,
315
85.9M
          -0.2546827712406646f,
316
85.9M
      },
317
85.9M
      {
318
85.9M
          0.2500000000000000,
319
85.9M
          -0.1014005039375377f,
320
85.9M
          -0.4444481661973438f,
321
85.9M
          0.3085497062849487f,
322
85.9M
          0.0000000000000000,
323
85.9M
          -0.0643507165794628f,
324
85.9M
          0.1585450355183970f,
325
85.9M
          -0.5112616136592012f,
326
85.9M
          0.2579236279634129f,
327
85.9M
          0.0000000000000000,
328
85.9M
          -0.0812611176717504f,
329
85.9M
          -0.1856718091610990f,
330
85.9M
          -0.3416446842253373f,
331
85.9M
          0.3302282550303805f,
332
85.9M
          0.0702790691196282f,
333
85.9M
          -0.0741750459581023f,
334
85.9M
      },
335
85.9M
      {
336
85.9M
          0.2500000000000000,
337
85.9M
          -0.1014005039375376f,
338
85.9M
          -0.2929100136981264f,
339
85.9M
          0.0000000000000000,
340
85.9M
          0.0000000000000000,
341
85.9M
          -0.0643507165794627f,
342
85.9M
          0.3935103426921022f,
343
85.9M
          0.0657870154914254f,
344
85.9M
          0.0000000000000000,
345
85.9M
          0.4082482904638634f,
346
85.9M
          0.3078822139579031f,
347
85.9M
          0.3852501370925211f,
348
85.9M
          -0.0857401903551927f,
349
85.9M
          -0.4613374887461554f,
350
85.9M
          0.0000000000000000,
351
85.9M
          0.2191868483885728f,
352
85.9M
      },
353
85.9M
      {
354
85.9M
          0.2500000000000000,
355
85.9M
          -0.1014005039375376f,
356
85.9M
          -0.1137907446044814f,
357
85.9M
          -0.1464291867126654f,
358
85.9M
          0.0000000000000000,
359
85.9M
          -0.0643507165794627f,
360
85.9M
          0.0829816309488214f,
361
85.9M
          0.2388977352334547f,
362
85.9M
          -0.3531238544981624f,
363
85.9M
          0.4082482904638630f,
364
85.9M
          -0.4826689115059858f,
365
85.9M
          -0.1741941265991621f,
366
85.9M
          -0.0476868035022928f,
367
85.9M
          0.1253805944856431f,
368
85.9M
          -0.4326608024727457f,
369
85.9M
          -0.2546827712406641f,
370
85.9M
      },
371
85.9M
      {
372
85.9M
          0.2500000000000000,
373
85.9M
          -0.1014005039375374f,
374
85.9M
          0.0000000000000000,
375
85.9M
          0.4251149611657548f,
376
85.9M
          0.0000000000000000,
377
85.9M
          -0.0643507165794626f,
378
85.9M
          -0.4517556589999480f,
379
85.9M
          0.0000000000000000,
380
85.9M
          -0.6035859033230976f,
381
85.9M
          0.0000000000000000,
382
85.9M
          0.0000000000000000,
383
85.9M
          0.0000000000000000,
384
85.9M
          -0.1426608480880724f,
385
85.9M
          -0.1381354035075845f,
386
85.9M
          0.3487520519930227f,
387
85.9M
          0.1135498731499429f,
388
85.9M
      },
389
85.9M
  };
390
391
85.9M
  const HWY_CAPPED(float, 16) d;
392
257M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
171M
    auto scalar = Zero(d);
394
2.92G
    for (size_t j = 0; j < 16; j++) {
395
2.75G
      auto px = Set(d, pixels[j]);
396
2.75G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
2.75G
      scalar = MulAdd(px, basis, scalar);
398
2.75G
    }
399
171M
    Store(scalar, d, coeffs + i);
400
171M
  }
401
85.9M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
1.92M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
1.92M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
1.92M
      {
102
1.92M
          0.2500000000000000,
103
1.92M
          0.8769029297991420f,
104
1.92M
          0.0000000000000000,
105
1.92M
          0.0000000000000000,
106
1.92M
          0.0000000000000000,
107
1.92M
          -0.4105377591765233f,
108
1.92M
          0.0000000000000000,
109
1.92M
          0.0000000000000000,
110
1.92M
          0.0000000000000000,
111
1.92M
          0.0000000000000000,
112
1.92M
          0.0000000000000000,
113
1.92M
          0.0000000000000000,
114
1.92M
          0.0000000000000000,
115
1.92M
          0.0000000000000000,
116
1.92M
          0.0000000000000000,
117
1.92M
          0.0000000000000000,
118
1.92M
      },
119
1.92M
      {
120
1.92M
          0.2500000000000000,
121
1.92M
          0.2206518106944235f,
122
1.92M
          0.0000000000000000,
123
1.92M
          0.0000000000000000,
124
1.92M
          -0.7071067811865474f,
125
1.92M
          0.6235485373547691f,
126
1.92M
          0.0000000000000000,
127
1.92M
          0.0000000000000000,
128
1.92M
          0.0000000000000000,
129
1.92M
          0.0000000000000000,
130
1.92M
          0.0000000000000000,
131
1.92M
          0.0000000000000000,
132
1.92M
          0.0000000000000000,
133
1.92M
          0.0000000000000000,
134
1.92M
          0.0000000000000000,
135
1.92M
          0.0000000000000000,
136
1.92M
      },
137
1.92M
      {
138
1.92M
          0.2500000000000000,
139
1.92M
          -0.1014005039375376f,
140
1.92M
          0.4067007583026075f,
141
1.92M
          -0.2125574805828875f,
142
1.92M
          0.0000000000000000,
143
1.92M
          -0.0643507165794627f,
144
1.92M
          -0.4517556589999482f,
145
1.92M
          -0.3046847507248690f,
146
1.92M
          0.3017929516615495f,
147
1.92M
          0.4082482904638627f,
148
1.92M
          0.1747866975480809f,
149
1.92M
          -0.2110560104933578f,
150
1.92M
          -0.1426608480880726f,
151
1.92M
          -0.1381354035075859f,
152
1.92M
          -0.1743760259965107f,
153
1.92M
          0.1135498731499434f,
154
1.92M
      },
155
1.92M
      {
156
1.92M
          0.2500000000000000,
157
1.92M
          -0.1014005039375375f,
158
1.92M
          0.4444481661973445f,
159
1.92M
          0.3085497062849767f,
160
1.92M
          0.0000000000000000f,
161
1.92M
          -0.0643507165794627f,
162
1.92M
          0.1585450355184006f,
163
1.92M
          0.5112616136591823f,
164
1.92M
          0.2579236279634118f,
165
1.92M
          0.0000000000000000,
166
1.92M
          0.0812611176717539f,
167
1.92M
          0.1856718091610980f,
168
1.92M
          -0.3416446842253372f,
169
1.92M
          0.3302282550303788f,
170
1.92M
          0.0702790691196284f,
171
1.92M
          -0.0741750459581035f,
172
1.92M
      },
173
1.92M
      {
174
1.92M
          0.2500000000000000,
175
1.92M
          0.2206518106944236f,
176
1.92M
          0.0000000000000000,
177
1.92M
          0.0000000000000000,
178
1.92M
          0.7071067811865476f,
179
1.92M
          0.6235485373547694f,
180
1.92M
          0.0000000000000000,
181
1.92M
          0.0000000000000000,
182
1.92M
          0.0000000000000000,
183
1.92M
          0.0000000000000000,
184
1.92M
          0.0000000000000000,
185
1.92M
          0.0000000000000000,
186
1.92M
          0.0000000000000000,
187
1.92M
          0.0000000000000000,
188
1.92M
          0.0000000000000000,
189
1.92M
          0.0000000000000000,
190
1.92M
      },
191
1.92M
      {
192
1.92M
          0.2500000000000000,
193
1.92M
          -0.1014005039375378f,
194
1.92M
          0.0000000000000000,
195
1.92M
          0.4706702258572536f,
196
1.92M
          0.0000000000000000,
197
1.92M
          -0.0643507165794628f,
198
1.92M
          -0.0403851516082220f,
199
1.92M
          0.0000000000000000,
200
1.92M
          0.1627234014286620f,
201
1.92M
          0.0000000000000000,
202
1.92M
          0.0000000000000000,
203
1.92M
          0.0000000000000000,
204
1.92M
          0.7367497537172237f,
205
1.92M
          0.0875511500058708f,
206
1.92M
          -0.2921026642334881f,
207
1.92M
          0.1940289303259434f,
208
1.92M
      },
209
1.92M
      {
210
1.92M
          0.2500000000000000,
211
1.92M
          -0.1014005039375377f,
212
1.92M
          0.1957439937204294f,
213
1.92M
          -0.1621205195722993f,
214
1.92M
          0.0000000000000000,
215
1.92M
          -0.0643507165794628f,
216
1.92M
          0.0074182263792424f,
217
1.92M
          -0.2904801297289980f,
218
1.92M
          0.0952002265347504f,
219
1.92M
          0.0000000000000000,
220
1.92M
          -0.3675398009862027f,
221
1.92M
          0.4921585901373873f,
222
1.92M
          0.2462710772207515f,
223
1.92M
          -0.0794670660590957f,
224
1.92M
          0.3623817333531167f,
225
1.92M
          -0.4351904965232280f,
226
1.92M
      },
227
1.92M
      {
228
1.92M
          0.2500000000000000,
229
1.92M
          -0.1014005039375376f,
230
1.92M
          0.2929100136981264f,
231
1.92M
          0.0000000000000000,
232
1.92M
          0.0000000000000000,
233
1.92M
          -0.0643507165794627f,
234
1.92M
          0.3935103426921017f,
235
1.92M
          -0.0657870154914280f,
236
1.92M
          0.0000000000000000,
237
1.92M
          -0.4082482904638628f,
238
1.92M
          -0.3078822139579090f,
239
1.92M
          -0.3852501370925192f,
240
1.92M
          -0.0857401903551931f,
241
1.92M
          -0.4613374887461511f,
242
1.92M
          0.0000000000000000,
243
1.92M
          0.2191868483885747f,
244
1.92M
      },
245
1.92M
      {
246
1.92M
          0.2500000000000000,
247
1.92M
          -0.1014005039375376f,
248
1.92M
          -0.4067007583026072f,
249
1.92M
          -0.2125574805828705f,
250
1.92M
          0.0000000000000000,
251
1.92M
          -0.0643507165794627f,
252
1.92M
          -0.4517556589999464f,
253
1.92M
          0.3046847507248840f,
254
1.92M
          0.3017929516615503f,
255
1.92M
          -0.4082482904638635f,
256
1.92M
          -0.1747866975480813f,
257
1.92M
          0.2110560104933581f,
258
1.92M
          -0.1426608480880734f,
259
1.92M
          -0.1381354035075829f,
260
1.92M
          -0.1743760259965108f,
261
1.92M
          0.1135498731499426f,
262
1.92M
      },
263
1.92M
      {
264
1.92M
          0.2500000000000000,
265
1.92M
          -0.1014005039375377f,
266
1.92M
          -0.1957439937204287f,
267
1.92M
          -0.1621205195722833f,
268
1.92M
          0.0000000000000000,
269
1.92M
          -0.0643507165794628f,
270
1.92M
          0.0074182263792444f,
271
1.92M
          0.2904801297290076f,
272
1.92M
          0.0952002265347505f,
273
1.92M
          0.0000000000000000,
274
1.92M
          0.3675398009862011f,
275
1.92M
          -0.4921585901373891f,
276
1.92M
          0.2462710772207514f,
277
1.92M
          -0.0794670660591026f,
278
1.92M
          0.3623817333531165f,
279
1.92M
          -0.4351904965232251f,
280
1.92M
      },
281
1.92M
      {
282
1.92M
          0.2500000000000000,
283
1.92M
          -0.1014005039375375f,
284
1.92M
          0.0000000000000000,
285
1.92M
          -0.4706702258572528f,
286
1.92M
          0.0000000000000000,
287
1.92M
          -0.0643507165794627f,
288
1.92M
          0.1107416575309343f,
289
1.92M
          0.0000000000000000,
290
1.92M
          -0.1627234014286617f,
291
1.92M
          0.0000000000000000,
292
1.92M
          0.0000000000000000,
293
1.92M
          0.0000000000000000,
294
1.92M
          0.1488339922711357f,
295
1.92M
          0.4972464710953509f,
296
1.92M
          0.2921026642334879f,
297
1.92M
          0.5550443808910661f,
298
1.92M
      },
299
1.92M
      {
300
1.92M
          0.2500000000000000,
301
1.92M
          -0.1014005039375377f,
302
1.92M
          0.1137907446044809f,
303
1.92M
          -0.1464291867126764f,
304
1.92M
          0.0000000000000000,
305
1.92M
          -0.0643507165794628f,
306
1.92M
          0.0829816309488205f,
307
1.92M
          -0.2388977352334460f,
308
1.92M
          -0.3531238544981630f,
309
1.92M
          -0.4082482904638630f,
310
1.92M
          0.4826689115059883f,
311
1.92M
          0.1741941265991622f,
312
1.92M
          -0.0476868035022925f,
313
1.92M
          0.1253805944856366f,
314
1.92M
          -0.4326608024727445f,
315
1.92M
          -0.2546827712406646f,
316
1.92M
      },
317
1.92M
      {
318
1.92M
          0.2500000000000000,
319
1.92M
          -0.1014005039375377f,
320
1.92M
          -0.4444481661973438f,
321
1.92M
          0.3085497062849487f,
322
1.92M
          0.0000000000000000,
323
1.92M
          -0.0643507165794628f,
324
1.92M
          0.1585450355183970f,
325
1.92M
          -0.5112616136592012f,
326
1.92M
          0.2579236279634129f,
327
1.92M
          0.0000000000000000,
328
1.92M
          -0.0812611176717504f,
329
1.92M
          -0.1856718091610990f,
330
1.92M
          -0.3416446842253373f,
331
1.92M
          0.3302282550303805f,
332
1.92M
          0.0702790691196282f,
333
1.92M
          -0.0741750459581023f,
334
1.92M
      },
335
1.92M
      {
336
1.92M
          0.2500000000000000,
337
1.92M
          -0.1014005039375376f,
338
1.92M
          -0.2929100136981264f,
339
1.92M
          0.0000000000000000,
340
1.92M
          0.0000000000000000,
341
1.92M
          -0.0643507165794627f,
342
1.92M
          0.3935103426921022f,
343
1.92M
          0.0657870154914254f,
344
1.92M
          0.0000000000000000,
345
1.92M
          0.4082482904638634f,
346
1.92M
          0.3078822139579031f,
347
1.92M
          0.3852501370925211f,
348
1.92M
          -0.0857401903551927f,
349
1.92M
          -0.4613374887461554f,
350
1.92M
          0.0000000000000000,
351
1.92M
          0.2191868483885728f,
352
1.92M
      },
353
1.92M
      {
354
1.92M
          0.2500000000000000,
355
1.92M
          -0.1014005039375376f,
356
1.92M
          -0.1137907446044814f,
357
1.92M
          -0.1464291867126654f,
358
1.92M
          0.0000000000000000,
359
1.92M
          -0.0643507165794627f,
360
1.92M
          0.0829816309488214f,
361
1.92M
          0.2388977352334547f,
362
1.92M
          -0.3531238544981624f,
363
1.92M
          0.4082482904638630f,
364
1.92M
          -0.4826689115059858f,
365
1.92M
          -0.1741941265991621f,
366
1.92M
          -0.0476868035022928f,
367
1.92M
          0.1253805944856431f,
368
1.92M
          -0.4326608024727457f,
369
1.92M
          -0.2546827712406641f,
370
1.92M
      },
371
1.92M
      {
372
1.92M
          0.2500000000000000,
373
1.92M
          -0.1014005039375374f,
374
1.92M
          0.0000000000000000,
375
1.92M
          0.4251149611657548f,
376
1.92M
          0.0000000000000000,
377
1.92M
          -0.0643507165794626f,
378
1.92M
          -0.4517556589999480f,
379
1.92M
          0.0000000000000000,
380
1.92M
          -0.6035859033230976f,
381
1.92M
          0.0000000000000000,
382
1.92M
          0.0000000000000000,
383
1.92M
          0.0000000000000000,
384
1.92M
          -0.1426608480880724f,
385
1.92M
          -0.1381354035075845f,
386
1.92M
          0.3487520519930227f,
387
1.92M
          0.1135498731499429f,
388
1.92M
      },
389
1.92M
  };
390
391
1.92M
  const HWY_CAPPED(float, 16) d;
392
5.77M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
3.84M
    auto scalar = Zero(d);
394
65.4M
    for (size_t j = 0; j < 16; j++) {
395
61.5M
      auto px = Set(d, pixels[j]);
396
61.5M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
61.5M
      scalar = MulAdd(px, basis, scalar);
398
61.5M
    }
399
3.84M
    Store(scalar, d, coeffs + i);
400
3.84M
  }
401
1.92M
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
82.1M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
82.1M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
82.1M
      {
102
82.1M
          0.2500000000000000,
103
82.1M
          0.8769029297991420f,
104
82.1M
          0.0000000000000000,
105
82.1M
          0.0000000000000000,
106
82.1M
          0.0000000000000000,
107
82.1M
          -0.4105377591765233f,
108
82.1M
          0.0000000000000000,
109
82.1M
          0.0000000000000000,
110
82.1M
          0.0000000000000000,
111
82.1M
          0.0000000000000000,
112
82.1M
          0.0000000000000000,
113
82.1M
          0.0000000000000000,
114
82.1M
          0.0000000000000000,
115
82.1M
          0.0000000000000000,
116
82.1M
          0.0000000000000000,
117
82.1M
          0.0000000000000000,
118
82.1M
      },
119
82.1M
      {
120
82.1M
          0.2500000000000000,
121
82.1M
          0.2206518106944235f,
122
82.1M
          0.0000000000000000,
123
82.1M
          0.0000000000000000,
124
82.1M
          -0.7071067811865474f,
125
82.1M
          0.6235485373547691f,
126
82.1M
          0.0000000000000000,
127
82.1M
          0.0000000000000000,
128
82.1M
          0.0000000000000000,
129
82.1M
          0.0000000000000000,
130
82.1M
          0.0000000000000000,
131
82.1M
          0.0000000000000000,
132
82.1M
          0.0000000000000000,
133
82.1M
          0.0000000000000000,
134
82.1M
          0.0000000000000000,
135
82.1M
          0.0000000000000000,
136
82.1M
      },
137
82.1M
      {
138
82.1M
          0.2500000000000000,
139
82.1M
          -0.1014005039375376f,
140
82.1M
          0.4067007583026075f,
141
82.1M
          -0.2125574805828875f,
142
82.1M
          0.0000000000000000,
143
82.1M
          -0.0643507165794627f,
144
82.1M
          -0.4517556589999482f,
145
82.1M
          -0.3046847507248690f,
146
82.1M
          0.3017929516615495f,
147
82.1M
          0.4082482904638627f,
148
82.1M
          0.1747866975480809f,
149
82.1M
          -0.2110560104933578f,
150
82.1M
          -0.1426608480880726f,
151
82.1M
          -0.1381354035075859f,
152
82.1M
          -0.1743760259965107f,
153
82.1M
          0.1135498731499434f,
154
82.1M
      },
155
82.1M
      {
156
82.1M
          0.2500000000000000,
157
82.1M
          -0.1014005039375375f,
158
82.1M
          0.4444481661973445f,
159
82.1M
          0.3085497062849767f,
160
82.1M
          0.0000000000000000f,
161
82.1M
          -0.0643507165794627f,
162
82.1M
          0.1585450355184006f,
163
82.1M
          0.5112616136591823f,
164
82.1M
          0.2579236279634118f,
165
82.1M
          0.0000000000000000,
166
82.1M
          0.0812611176717539f,
167
82.1M
          0.1856718091610980f,
168
82.1M
          -0.3416446842253372f,
169
82.1M
          0.3302282550303788f,
170
82.1M
          0.0702790691196284f,
171
82.1M
          -0.0741750459581035f,
172
82.1M
      },
173
82.1M
      {
174
82.1M
          0.2500000000000000,
175
82.1M
          0.2206518106944236f,
176
82.1M
          0.0000000000000000,
177
82.1M
          0.0000000000000000,
178
82.1M
          0.7071067811865476f,
179
82.1M
          0.6235485373547694f,
180
82.1M
          0.0000000000000000,
181
82.1M
          0.0000000000000000,
182
82.1M
          0.0000000000000000,
183
82.1M
          0.0000000000000000,
184
82.1M
          0.0000000000000000,
185
82.1M
          0.0000000000000000,
186
82.1M
          0.0000000000000000,
187
82.1M
          0.0000000000000000,
188
82.1M
          0.0000000000000000,
189
82.1M
          0.0000000000000000,
190
82.1M
      },
191
82.1M
      {
192
82.1M
          0.2500000000000000,
193
82.1M
          -0.1014005039375378f,
194
82.1M
          0.0000000000000000,
195
82.1M
          0.4706702258572536f,
196
82.1M
          0.0000000000000000,
197
82.1M
          -0.0643507165794628f,
198
82.1M
          -0.0403851516082220f,
199
82.1M
          0.0000000000000000,
200
82.1M
          0.1627234014286620f,
201
82.1M
          0.0000000000000000,
202
82.1M
          0.0000000000000000,
203
82.1M
          0.0000000000000000,
204
82.1M
          0.7367497537172237f,
205
82.1M
          0.0875511500058708f,
206
82.1M
          -0.2921026642334881f,
207
82.1M
          0.1940289303259434f,
208
82.1M
      },
209
82.1M
      {
210
82.1M
          0.2500000000000000,
211
82.1M
          -0.1014005039375377f,
212
82.1M
          0.1957439937204294f,
213
82.1M
          -0.1621205195722993f,
214
82.1M
          0.0000000000000000,
215
82.1M
          -0.0643507165794628f,
216
82.1M
          0.0074182263792424f,
217
82.1M
          -0.2904801297289980f,
218
82.1M
          0.0952002265347504f,
219
82.1M
          0.0000000000000000,
220
82.1M
          -0.3675398009862027f,
221
82.1M
          0.4921585901373873f,
222
82.1M
          0.2462710772207515f,
223
82.1M
          -0.0794670660590957f,
224
82.1M
          0.3623817333531167f,
225
82.1M
          -0.4351904965232280f,
226
82.1M
      },
227
82.1M
      {
228
82.1M
          0.2500000000000000,
229
82.1M
          -0.1014005039375376f,
230
82.1M
          0.2929100136981264f,
231
82.1M
          0.0000000000000000,
232
82.1M
          0.0000000000000000,
233
82.1M
          -0.0643507165794627f,
234
82.1M
          0.3935103426921017f,
235
82.1M
          -0.0657870154914280f,
236
82.1M
          0.0000000000000000,
237
82.1M
          -0.4082482904638628f,
238
82.1M
          -0.3078822139579090f,
239
82.1M
          -0.3852501370925192f,
240
82.1M
          -0.0857401903551931f,
241
82.1M
          -0.4613374887461511f,
242
82.1M
          0.0000000000000000,
243
82.1M
          0.2191868483885747f,
244
82.1M
      },
245
82.1M
      {
246
82.1M
          0.2500000000000000,
247
82.1M
          -0.1014005039375376f,
248
82.1M
          -0.4067007583026072f,
249
82.1M
          -0.2125574805828705f,
250
82.1M
          0.0000000000000000,
251
82.1M
          -0.0643507165794627f,
252
82.1M
          -0.4517556589999464f,
253
82.1M
          0.3046847507248840f,
254
82.1M
          0.3017929516615503f,
255
82.1M
          -0.4082482904638635f,
256
82.1M
          -0.1747866975480813f,
257
82.1M
          0.2110560104933581f,
258
82.1M
          -0.1426608480880734f,
259
82.1M
          -0.1381354035075829f,
260
82.1M
          -0.1743760259965108f,
261
82.1M
          0.1135498731499426f,
262
82.1M
      },
263
82.1M
      {
264
82.1M
          0.2500000000000000,
265
82.1M
          -0.1014005039375377f,
266
82.1M
          -0.1957439937204287f,
267
82.1M
          -0.1621205195722833f,
268
82.1M
          0.0000000000000000,
269
82.1M
          -0.0643507165794628f,
270
82.1M
          0.0074182263792444f,
271
82.1M
          0.2904801297290076f,
272
82.1M
          0.0952002265347505f,
273
82.1M
          0.0000000000000000,
274
82.1M
          0.3675398009862011f,
275
82.1M
          -0.4921585901373891f,
276
82.1M
          0.2462710772207514f,
277
82.1M
          -0.0794670660591026f,
278
82.1M
          0.3623817333531165f,
279
82.1M
          -0.4351904965232251f,
280
82.1M
      },
281
82.1M
      {
282
82.1M
          0.2500000000000000,
283
82.1M
          -0.1014005039375375f,
284
82.1M
          0.0000000000000000,
285
82.1M
          -0.4706702258572528f,
286
82.1M
          0.0000000000000000,
287
82.1M
          -0.0643507165794627f,
288
82.1M
          0.1107416575309343f,
289
82.1M
          0.0000000000000000,
290
82.1M
          -0.1627234014286617f,
291
82.1M
          0.0000000000000000,
292
82.1M
          0.0000000000000000,
293
82.1M
          0.0000000000000000,
294
82.1M
          0.1488339922711357f,
295
82.1M
          0.4972464710953509f,
296
82.1M
          0.2921026642334879f,
297
82.1M
          0.5550443808910661f,
298
82.1M
      },
299
82.1M
      {
300
82.1M
          0.2500000000000000,
301
82.1M
          -0.1014005039375377f,
302
82.1M
          0.1137907446044809f,
303
82.1M
          -0.1464291867126764f,
304
82.1M
          0.0000000000000000,
305
82.1M
          -0.0643507165794628f,
306
82.1M
          0.0829816309488205f,
307
82.1M
          -0.2388977352334460f,
308
82.1M
          -0.3531238544981630f,
309
82.1M
          -0.4082482904638630f,
310
82.1M
          0.4826689115059883f,
311
82.1M
          0.1741941265991622f,
312
82.1M
          -0.0476868035022925f,
313
82.1M
          0.1253805944856366f,
314
82.1M
          -0.4326608024727445f,
315
82.1M
          -0.2546827712406646f,
316
82.1M
      },
317
82.1M
      {
318
82.1M
          0.2500000000000000,
319
82.1M
          -0.1014005039375377f,
320
82.1M
          -0.4444481661973438f,
321
82.1M
          0.3085497062849487f,
322
82.1M
          0.0000000000000000,
323
82.1M
          -0.0643507165794628f,
324
82.1M
          0.1585450355183970f,
325
82.1M
          -0.5112616136592012f,
326
82.1M
          0.2579236279634129f,
327
82.1M
          0.0000000000000000,
328
82.1M
          -0.0812611176717504f,
329
82.1M
          -0.1856718091610990f,
330
82.1M
          -0.3416446842253373f,
331
82.1M
          0.3302282550303805f,
332
82.1M
          0.0702790691196282f,
333
82.1M
          -0.0741750459581023f,
334
82.1M
      },
335
82.1M
      {
336
82.1M
          0.2500000000000000,
337
82.1M
          -0.1014005039375376f,
338
82.1M
          -0.2929100136981264f,
339
82.1M
          0.0000000000000000,
340
82.1M
          0.0000000000000000,
341
82.1M
          -0.0643507165794627f,
342
82.1M
          0.3935103426921022f,
343
82.1M
          0.0657870154914254f,
344
82.1M
          0.0000000000000000,
345
82.1M
          0.4082482904638634f,
346
82.1M
          0.3078822139579031f,
347
82.1M
          0.3852501370925211f,
348
82.1M
          -0.0857401903551927f,
349
82.1M
          -0.4613374887461554f,
350
82.1M
          0.0000000000000000,
351
82.1M
          0.2191868483885728f,
352
82.1M
      },
353
82.1M
      {
354
82.1M
          0.2500000000000000,
355
82.1M
          -0.1014005039375376f,
356
82.1M
          -0.1137907446044814f,
357
82.1M
          -0.1464291867126654f,
358
82.1M
          0.0000000000000000,
359
82.1M
          -0.0643507165794627f,
360
82.1M
          0.0829816309488214f,
361
82.1M
          0.2388977352334547f,
362
82.1M
          -0.3531238544981624f,
363
82.1M
          0.4082482904638630f,
364
82.1M
          -0.4826689115059858f,
365
82.1M
          -0.1741941265991621f,
366
82.1M
          -0.0476868035022928f,
367
82.1M
          0.1253805944856431f,
368
82.1M
          -0.4326608024727457f,
369
82.1M
          -0.2546827712406641f,
370
82.1M
      },
371
82.1M
      {
372
82.1M
          0.2500000000000000,
373
82.1M
          -0.1014005039375374f,
374
82.1M
          0.0000000000000000,
375
82.1M
          0.4251149611657548f,
376
82.1M
          0.0000000000000000,
377
82.1M
          -0.0643507165794626f,
378
82.1M
          -0.4517556589999480f,
379
82.1M
          0.0000000000000000,
380
82.1M
          -0.6035859033230976f,
381
82.1M
          0.0000000000000000,
382
82.1M
          0.0000000000000000,
383
82.1M
          0.0000000000000000,
384
82.1M
          -0.1426608480880724f,
385
82.1M
          -0.1381354035075845f,
386
82.1M
          0.3487520519930227f,
387
82.1M
          0.1135498731499429f,
388
82.1M
      },
389
82.1M
  };
390
391
82.1M
  const HWY_CAPPED(float, 16) d;
392
246M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
164M
    auto scalar = Zero(d);
394
2.79G
    for (size_t j = 0; j < 16; j++) {
395
2.62G
      auto px = Set(d, pixels[j]);
396
2.62G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
2.62G
      scalar = MulAdd(px, basis, scalar);
398
2.62G
    }
399
164M
    Store(scalar, d, coeffs + i);
400
164M
  }
401
82.1M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
1.92M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
1.92M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
1.92M
      {
102
1.92M
          0.2500000000000000,
103
1.92M
          0.8769029297991420f,
104
1.92M
          0.0000000000000000,
105
1.92M
          0.0000000000000000,
106
1.92M
          0.0000000000000000,
107
1.92M
          -0.4105377591765233f,
108
1.92M
          0.0000000000000000,
109
1.92M
          0.0000000000000000,
110
1.92M
          0.0000000000000000,
111
1.92M
          0.0000000000000000,
112
1.92M
          0.0000000000000000,
113
1.92M
          0.0000000000000000,
114
1.92M
          0.0000000000000000,
115
1.92M
          0.0000000000000000,
116
1.92M
          0.0000000000000000,
117
1.92M
          0.0000000000000000,
118
1.92M
      },
119
1.92M
      {
120
1.92M
          0.2500000000000000,
121
1.92M
          0.2206518106944235f,
122
1.92M
          0.0000000000000000,
123
1.92M
          0.0000000000000000,
124
1.92M
          -0.7071067811865474f,
125
1.92M
          0.6235485373547691f,
126
1.92M
          0.0000000000000000,
127
1.92M
          0.0000000000000000,
128
1.92M
          0.0000000000000000,
129
1.92M
          0.0000000000000000,
130
1.92M
          0.0000000000000000,
131
1.92M
          0.0000000000000000,
132
1.92M
          0.0000000000000000,
133
1.92M
          0.0000000000000000,
134
1.92M
          0.0000000000000000,
135
1.92M
          0.0000000000000000,
136
1.92M
      },
137
1.92M
      {
138
1.92M
          0.2500000000000000,
139
1.92M
          -0.1014005039375376f,
140
1.92M
          0.4067007583026075f,
141
1.92M
          -0.2125574805828875f,
142
1.92M
          0.0000000000000000,
143
1.92M
          -0.0643507165794627f,
144
1.92M
          -0.4517556589999482f,
145
1.92M
          -0.3046847507248690f,
146
1.92M
          0.3017929516615495f,
147
1.92M
          0.4082482904638627f,
148
1.92M
          0.1747866975480809f,
149
1.92M
          -0.2110560104933578f,
150
1.92M
          -0.1426608480880726f,
151
1.92M
          -0.1381354035075859f,
152
1.92M
          -0.1743760259965107f,
153
1.92M
          0.1135498731499434f,
154
1.92M
      },
155
1.92M
      {
156
1.92M
          0.2500000000000000,
157
1.92M
          -0.1014005039375375f,
158
1.92M
          0.4444481661973445f,
159
1.92M
          0.3085497062849767f,
160
1.92M
          0.0000000000000000f,
161
1.92M
          -0.0643507165794627f,
162
1.92M
          0.1585450355184006f,
163
1.92M
          0.5112616136591823f,
164
1.92M
          0.2579236279634118f,
165
1.92M
          0.0000000000000000,
166
1.92M
          0.0812611176717539f,
167
1.92M
          0.1856718091610980f,
168
1.92M
          -0.3416446842253372f,
169
1.92M
          0.3302282550303788f,
170
1.92M
          0.0702790691196284f,
171
1.92M
          -0.0741750459581035f,
172
1.92M
      },
173
1.92M
      {
174
1.92M
          0.2500000000000000,
175
1.92M
          0.2206518106944236f,
176
1.92M
          0.0000000000000000,
177
1.92M
          0.0000000000000000,
178
1.92M
          0.7071067811865476f,
179
1.92M
          0.6235485373547694f,
180
1.92M
          0.0000000000000000,
181
1.92M
          0.0000000000000000,
182
1.92M
          0.0000000000000000,
183
1.92M
          0.0000000000000000,
184
1.92M
          0.0000000000000000,
185
1.92M
          0.0000000000000000,
186
1.92M
          0.0000000000000000,
187
1.92M
          0.0000000000000000,
188
1.92M
          0.0000000000000000,
189
1.92M
          0.0000000000000000,
190
1.92M
      },
191
1.92M
      {
192
1.92M
          0.2500000000000000,
193
1.92M
          -0.1014005039375378f,
194
1.92M
          0.0000000000000000,
195
1.92M
          0.4706702258572536f,
196
1.92M
          0.0000000000000000,
197
1.92M
          -0.0643507165794628f,
198
1.92M
          -0.0403851516082220f,
199
1.92M
          0.0000000000000000,
200
1.92M
          0.1627234014286620f,
201
1.92M
          0.0000000000000000,
202
1.92M
          0.0000000000000000,
203
1.92M
          0.0000000000000000,
204
1.92M
          0.7367497537172237f,
205
1.92M
          0.0875511500058708f,
206
1.92M
          -0.2921026642334881f,
207
1.92M
          0.1940289303259434f,
208
1.92M
      },
209
1.92M
      {
210
1.92M
          0.2500000000000000,
211
1.92M
          -0.1014005039375377f,
212
1.92M
          0.1957439937204294f,
213
1.92M
          -0.1621205195722993f,
214
1.92M
          0.0000000000000000,
215
1.92M
          -0.0643507165794628f,
216
1.92M
          0.0074182263792424f,
217
1.92M
          -0.2904801297289980f,
218
1.92M
          0.0952002265347504f,
219
1.92M
          0.0000000000000000,
220
1.92M
          -0.3675398009862027f,
221
1.92M
          0.4921585901373873f,
222
1.92M
          0.2462710772207515f,
223
1.92M
          -0.0794670660590957f,
224
1.92M
          0.3623817333531167f,
225
1.92M
          -0.4351904965232280f,
226
1.92M
      },
227
1.92M
      {
228
1.92M
          0.2500000000000000,
229
1.92M
          -0.1014005039375376f,
230
1.92M
          0.2929100136981264f,
231
1.92M
          0.0000000000000000,
232
1.92M
          0.0000000000000000,
233
1.92M
          -0.0643507165794627f,
234
1.92M
          0.3935103426921017f,
235
1.92M
          -0.0657870154914280f,
236
1.92M
          0.0000000000000000,
237
1.92M
          -0.4082482904638628f,
238
1.92M
          -0.3078822139579090f,
239
1.92M
          -0.3852501370925192f,
240
1.92M
          -0.0857401903551931f,
241
1.92M
          -0.4613374887461511f,
242
1.92M
          0.0000000000000000,
243
1.92M
          0.2191868483885747f,
244
1.92M
      },
245
1.92M
      {
246
1.92M
          0.2500000000000000,
247
1.92M
          -0.1014005039375376f,
248
1.92M
          -0.4067007583026072f,
249
1.92M
          -0.2125574805828705f,
250
1.92M
          0.0000000000000000,
251
1.92M
          -0.0643507165794627f,
252
1.92M
          -0.4517556589999464f,
253
1.92M
          0.3046847507248840f,
254
1.92M
          0.3017929516615503f,
255
1.92M
          -0.4082482904638635f,
256
1.92M
          -0.1747866975480813f,
257
1.92M
          0.2110560104933581f,
258
1.92M
          -0.1426608480880734f,
259
1.92M
          -0.1381354035075829f,
260
1.92M
          -0.1743760259965108f,
261
1.92M
          0.1135498731499426f,
262
1.92M
      },
263
1.92M
      {
264
1.92M
          0.2500000000000000,
265
1.92M
          -0.1014005039375377f,
266
1.92M
          -0.1957439937204287f,
267
1.92M
          -0.1621205195722833f,
268
1.92M
          0.0000000000000000,
269
1.92M
          -0.0643507165794628f,
270
1.92M
          0.0074182263792444f,
271
1.92M
          0.2904801297290076f,
272
1.92M
          0.0952002265347505f,
273
1.92M
          0.0000000000000000,
274
1.92M
          0.3675398009862011f,
275
1.92M
          -0.4921585901373891f,
276
1.92M
          0.2462710772207514f,
277
1.92M
          -0.0794670660591026f,
278
1.92M
          0.3623817333531165f,
279
1.92M
          -0.4351904965232251f,
280
1.92M
      },
281
1.92M
      {
282
1.92M
          0.2500000000000000,
283
1.92M
          -0.1014005039375375f,
284
1.92M
          0.0000000000000000,
285
1.92M
          -0.4706702258572528f,
286
1.92M
          0.0000000000000000,
287
1.92M
          -0.0643507165794627f,
288
1.92M
          0.1107416575309343f,
289
1.92M
          0.0000000000000000,
290
1.92M
          -0.1627234014286617f,
291
1.92M
          0.0000000000000000,
292
1.92M
          0.0000000000000000,
293
1.92M
          0.0000000000000000,
294
1.92M
          0.1488339922711357f,
295
1.92M
          0.4972464710953509f,
296
1.92M
          0.2921026642334879f,
297
1.92M
          0.5550443808910661f,
298
1.92M
      },
299
1.92M
      {
300
1.92M
          0.2500000000000000,
301
1.92M
          -0.1014005039375377f,
302
1.92M
          0.1137907446044809f,
303
1.92M
          -0.1464291867126764f,
304
1.92M
          0.0000000000000000,
305
1.92M
          -0.0643507165794628f,
306
1.92M
          0.0829816309488205f,
307
1.92M
          -0.2388977352334460f,
308
1.92M
          -0.3531238544981630f,
309
1.92M
          -0.4082482904638630f,
310
1.92M
          0.4826689115059883f,
311
1.92M
          0.1741941265991622f,
312
1.92M
          -0.0476868035022925f,
313
1.92M
          0.1253805944856366f,
314
1.92M
          -0.4326608024727445f,
315
1.92M
          -0.2546827712406646f,
316
1.92M
      },
317
1.92M
      {
318
1.92M
          0.2500000000000000,
319
1.92M
          -0.1014005039375377f,
320
1.92M
          -0.4444481661973438f,
321
1.92M
          0.3085497062849487f,
322
1.92M
          0.0000000000000000,
323
1.92M
          -0.0643507165794628f,
324
1.92M
          0.1585450355183970f,
325
1.92M
          -0.5112616136592012f,
326
1.92M
          0.2579236279634129f,
327
1.92M
          0.0000000000000000,
328
1.92M
          -0.0812611176717504f,
329
1.92M
          -0.1856718091610990f,
330
1.92M
          -0.3416446842253373f,
331
1.92M
          0.3302282550303805f,
332
1.92M
          0.0702790691196282f,
333
1.92M
          -0.0741750459581023f,
334
1.92M
      },
335
1.92M
      {
336
1.92M
          0.2500000000000000,
337
1.92M
          -0.1014005039375376f,
338
1.92M
          -0.2929100136981264f,
339
1.92M
          0.0000000000000000,
340
1.92M
          0.0000000000000000,
341
1.92M
          -0.0643507165794627f,
342
1.92M
          0.3935103426921022f,
343
1.92M
          0.0657870154914254f,
344
1.92M
          0.0000000000000000,
345
1.92M
          0.4082482904638634f,
346
1.92M
          0.3078822139579031f,
347
1.92M
          0.3852501370925211f,
348
1.92M
          -0.0857401903551927f,
349
1.92M
          -0.4613374887461554f,
350
1.92M
          0.0000000000000000,
351
1.92M
          0.2191868483885728f,
352
1.92M
      },
353
1.92M
      {
354
1.92M
          0.2500000000000000,
355
1.92M
          -0.1014005039375376f,
356
1.92M
          -0.1137907446044814f,
357
1.92M
          -0.1464291867126654f,
358
1.92M
          0.0000000000000000,
359
1.92M
          -0.0643507165794627f,
360
1.92M
          0.0829816309488214f,
361
1.92M
          0.2388977352334547f,
362
1.92M
          -0.3531238544981624f,
363
1.92M
          0.4082482904638630f,
364
1.92M
          -0.4826689115059858f,
365
1.92M
          -0.1741941265991621f,
366
1.92M
          -0.0476868035022928f,
367
1.92M
          0.1253805944856431f,
368
1.92M
          -0.4326608024727457f,
369
1.92M
          -0.2546827712406641f,
370
1.92M
      },
371
1.92M
      {
372
1.92M
          0.2500000000000000,
373
1.92M
          -0.1014005039375374f,
374
1.92M
          0.0000000000000000,
375
1.92M
          0.4251149611657548f,
376
1.92M
          0.0000000000000000,
377
1.92M
          -0.0643507165794626f,
378
1.92M
          -0.4517556589999480f,
379
1.92M
          0.0000000000000000,
380
1.92M
          -0.6035859033230976f,
381
1.92M
          0.0000000000000000,
382
1.92M
          0.0000000000000000,
383
1.92M
          0.0000000000000000,
384
1.92M
          -0.1426608480880724f,
385
1.92M
          -0.1381354035075845f,
386
1.92M
          0.3487520519930227f,
387
1.92M
          0.1135498731499429f,
388
1.92M
      },
389
1.92M
  };
390
391
1.92M
  const HWY_CAPPED(float, 16) d;
392
5.77M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
3.84M
    auto scalar = Zero(d);
394
65.4M
    for (size_t j = 0; j < 16; j++) {
395
61.5M
      auto px = Set(d, pixels[j]);
396
61.5M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
61.5M
      scalar = MulAdd(px, basis, scalar);
398
61.5M
    }
399
3.84M
    Store(scalar, d, coeffs + i);
400
3.84M
  }
401
1.92M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
402
403
// Coefficient layout:
404
//  - (even, even) positions hold AFV coefficients
405
//  - (odd, even) positions hold DCT4x4 coefficients
406
//  - (any, odd) positions hold DCT4x8 coefficients
407
template <size_t afv_kind>
408
void AFVTransformFromPixels(const float* JXL_RESTRICT pixels,
409
                            size_t pixels_stride,
410
85.9M
                            float* JXL_RESTRICT coefficients) {
411
85.9M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
85.9M
  size_t afv_x = afv_kind & 1;
413
85.9M
  size_t afv_y = afv_kind / 2;
414
85.9M
  HWY_ALIGN float block[4 * 8] = {};
415
429M
  for (size_t iy = 0; iy < 4; iy++) {
416
1.71G
    for (size_t ix = 0; ix < 4; ix++) {
417
1.37G
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.37G
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.37G
    }
420
343M
  }
421
  // AFV coefficients in (even, even) positions.
422
85.9M
  HWY_ALIGN float coeff[4 * 4];
423
85.9M
  AFVDCT4x4(block, coeff);
424
429M
  for (size_t iy = 0; iy < 4; iy++) {
425
1.71G
    for (size_t ix = 0; ix < 4; ix++) {
426
1.37G
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.37G
    }
428
343M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
85.9M
  ComputeScaledDCT<4, 4>()(
431
85.9M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
85.9M
              pixels_stride),
433
85.9M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
429M
  for (size_t iy = 0; iy < 4; iy++) {
436
3.09G
    for (size_t ix = 0; ix < 8; ix++) {
437
2.75G
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.75G
    }
439
343M
  }
440
  // 4x8 DCT of the other half of the block.
441
85.9M
  ComputeScaledDCT<4, 8>()(
442
85.9M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
85.9M
      block, scratch_space);
444
429M
  for (size_t iy = 0; iy < 4; iy++) {
445
3.09G
    for (size_t ix = 0; ix < 8; ix++) {
446
2.75G
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.75G
    }
448
343M
  }
449
85.9M
  float block00 = coefficients[0] * 0.25f;
450
85.9M
  float block01 = coefficients[1];
451
85.9M
  float block10 = coefficients[8];
452
85.9M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
85.9M
  coefficients[1] = (block00 - block01) * 0.5f;
454
85.9M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
85.9M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
346k
                            float* JXL_RESTRICT coefficients) {
411
346k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
346k
  size_t afv_x = afv_kind & 1;
413
346k
  size_t afv_y = afv_kind / 2;
414
346k
  HWY_ALIGN float block[4 * 8] = {};
415
1.73M
  for (size_t iy = 0; iy < 4; iy++) {
416
6.92M
    for (size_t ix = 0; ix < 4; ix++) {
417
5.54M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
5.54M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
5.54M
    }
420
1.38M
  }
421
  // AFV coefficients in (even, even) positions.
422
346k
  HWY_ALIGN float coeff[4 * 4];
423
346k
  AFVDCT4x4(block, coeff);
424
1.73M
  for (size_t iy = 0; iy < 4; iy++) {
425
6.92M
    for (size_t ix = 0; ix < 4; ix++) {
426
5.54M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
5.54M
    }
428
1.38M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
346k
  ComputeScaledDCT<4, 4>()(
431
346k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
346k
              pixels_stride),
433
346k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.73M
  for (size_t iy = 0; iy < 4; iy++) {
436
12.4M
    for (size_t ix = 0; ix < 8; ix++) {
437
11.0M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
11.0M
    }
439
1.38M
  }
440
  // 4x8 DCT of the other half of the block.
441
346k
  ComputeScaledDCT<4, 8>()(
442
346k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
346k
      block, scratch_space);
444
1.73M
  for (size_t iy = 0; iy < 4; iy++) {
445
12.4M
    for (size_t ix = 0; ix < 8; ix++) {
446
11.0M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
11.0M
    }
448
1.38M
  }
449
346k
  float block00 = coefficients[0] * 0.25f;
450
346k
  float block01 = coefficients[1];
451
346k
  float block10 = coefficients[8];
452
346k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
346k
  coefficients[1] = (block00 - block01) * 0.5f;
454
346k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
346k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
994k
                            float* JXL_RESTRICT coefficients) {
411
994k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
994k
  size_t afv_x = afv_kind & 1;
413
994k
  size_t afv_y = afv_kind / 2;
414
994k
  HWY_ALIGN float block[4 * 8] = {};
415
4.97M
  for (size_t iy = 0; iy < 4; iy++) {
416
19.8M
    for (size_t ix = 0; ix < 4; ix++) {
417
15.9M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
15.9M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
15.9M
    }
420
3.97M
  }
421
  // AFV coefficients in (even, even) positions.
422
994k
  HWY_ALIGN float coeff[4 * 4];
423
994k
  AFVDCT4x4(block, coeff);
424
4.97M
  for (size_t iy = 0; iy < 4; iy++) {
425
19.8M
    for (size_t ix = 0; ix < 4; ix++) {
426
15.9M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
15.9M
    }
428
3.97M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
994k
  ComputeScaledDCT<4, 4>()(
431
994k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
994k
              pixels_stride),
433
994k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
4.97M
  for (size_t iy = 0; iy < 4; iy++) {
436
35.7M
    for (size_t ix = 0; ix < 8; ix++) {
437
31.8M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
31.8M
    }
439
3.97M
  }
440
  // 4x8 DCT of the other half of the block.
441
994k
  ComputeScaledDCT<4, 8>()(
442
994k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
994k
      block, scratch_space);
444
4.97M
  for (size_t iy = 0; iy < 4; iy++) {
445
35.7M
    for (size_t ix = 0; ix < 8; ix++) {
446
31.8M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
31.8M
    }
448
3.97M
  }
449
994k
  float block00 = coefficients[0] * 0.25f;
450
994k
  float block01 = coefficients[1];
451
994k
  float block10 = coefficients[8];
452
994k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
994k
  coefficients[1] = (block00 - block01) * 0.5f;
454
994k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
994k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
216k
                            float* JXL_RESTRICT coefficients) {
411
216k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
216k
  size_t afv_x = afv_kind & 1;
413
216k
  size_t afv_y = afv_kind / 2;
414
216k
  HWY_ALIGN float block[4 * 8] = {};
415
1.08M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.33M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.46M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
3.46M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
3.46M
    }
420
866k
  }
421
  // AFV coefficients in (even, even) positions.
422
216k
  HWY_ALIGN float coeff[4 * 4];
423
216k
  AFVDCT4x4(block, coeff);
424
1.08M
  for (size_t iy = 0; iy < 4; iy++) {
425
4.33M
    for (size_t ix = 0; ix < 4; ix++) {
426
3.46M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
3.46M
    }
428
866k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
216k
  ComputeScaledDCT<4, 4>()(
431
216k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
216k
              pixels_stride),
433
216k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.08M
  for (size_t iy = 0; iy < 4; iy++) {
436
7.80M
    for (size_t ix = 0; ix < 8; ix++) {
437
6.93M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
6.93M
    }
439
866k
  }
440
  // 4x8 DCT of the other half of the block.
441
216k
  ComputeScaledDCT<4, 8>()(
442
216k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
216k
      block, scratch_space);
444
1.08M
  for (size_t iy = 0; iy < 4; iy++) {
445
7.80M
    for (size_t ix = 0; ix < 8; ix++) {
446
6.93M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
6.93M
    }
448
866k
  }
449
216k
  float block00 = coefficients[0] * 0.25f;
450
216k
  float block01 = coefficients[1];
451
216k
  float block10 = coefficients[8];
452
216k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
216k
  coefficients[1] = (block00 - block01) * 0.5f;
454
216k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
216k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
366k
                            float* JXL_RESTRICT coefficients) {
411
366k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
366k
  size_t afv_x = afv_kind & 1;
413
366k
  size_t afv_y = afv_kind / 2;
414
366k
  HWY_ALIGN float block[4 * 8] = {};
415
1.83M
  for (size_t iy = 0; iy < 4; iy++) {
416
7.33M
    for (size_t ix = 0; ix < 4; ix++) {
417
5.87M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
5.87M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
5.87M
    }
420
1.46M
  }
421
  // AFV coefficients in (even, even) positions.
422
366k
  HWY_ALIGN float coeff[4 * 4];
423
366k
  AFVDCT4x4(block, coeff);
424
1.83M
  for (size_t iy = 0; iy < 4; iy++) {
425
7.33M
    for (size_t ix = 0; ix < 4; ix++) {
426
5.87M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
5.87M
    }
428
1.46M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
366k
  ComputeScaledDCT<4, 4>()(
431
366k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
366k
              pixels_stride),
433
366k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.83M
  for (size_t iy = 0; iy < 4; iy++) {
436
13.2M
    for (size_t ix = 0; ix < 8; ix++) {
437
11.7M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
11.7M
    }
439
1.46M
  }
440
  // 4x8 DCT of the other half of the block.
441
366k
  ComputeScaledDCT<4, 8>()(
442
366k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
366k
      block, scratch_space);
444
1.83M
  for (size_t iy = 0; iy < 4; iy++) {
445
13.2M
    for (size_t ix = 0; ix < 8; ix++) {
446
11.7M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
11.7M
    }
448
1.46M
  }
449
366k
  float block00 = coefficients[0] * 0.25f;
450
366k
  float block01 = coefficients[1];
451
366k
  float block10 = coefficients[8];
452
366k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
366k
  coefficients[1] = (block00 - block01) * 0.5f;
454
366k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
366k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
20.5M
                            float* JXL_RESTRICT coefficients) {
411
20.5M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
20.5M
  size_t afv_x = afv_kind & 1;
413
20.5M
  size_t afv_y = afv_kind / 2;
414
20.5M
  HWY_ALIGN float block[4 * 8] = {};
415
102M
  for (size_t iy = 0; iy < 4; iy++) {
416
410M
    for (size_t ix = 0; ix < 4; ix++) {
417
328M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
328M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
328M
    }
420
82.1M
  }
421
  // AFV coefficients in (even, even) positions.
422
20.5M
  HWY_ALIGN float coeff[4 * 4];
423
20.5M
  AFVDCT4x4(block, coeff);
424
102M
  for (size_t iy = 0; iy < 4; iy++) {
425
410M
    for (size_t ix = 0; ix < 4; ix++) {
426
328M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
328M
    }
428
82.1M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
20.5M
  ComputeScaledDCT<4, 4>()(
431
20.5M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
20.5M
              pixels_stride),
433
20.5M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
102M
  for (size_t iy = 0; iy < 4; iy++) {
436
739M
    for (size_t ix = 0; ix < 8; ix++) {
437
657M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
657M
    }
439
82.1M
  }
440
  // 4x8 DCT of the other half of the block.
441
20.5M
  ComputeScaledDCT<4, 8>()(
442
20.5M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
20.5M
      block, scratch_space);
444
102M
  for (size_t iy = 0; iy < 4; iy++) {
445
739M
    for (size_t ix = 0; ix < 8; ix++) {
446
657M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
657M
    }
448
82.1M
  }
449
20.5M
  float block00 = coefficients[0] * 0.25f;
450
20.5M
  float block01 = coefficients[1];
451
20.5M
  float block10 = coefficients[8];
452
20.5M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
20.5M
  coefficients[1] = (block00 - block01) * 0.5f;
454
20.5M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
20.5M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
20.5M
                            float* JXL_RESTRICT coefficients) {
411
20.5M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
20.5M
  size_t afv_x = afv_kind & 1;
413
20.5M
  size_t afv_y = afv_kind / 2;
414
20.5M
  HWY_ALIGN float block[4 * 8] = {};
415
102M
  for (size_t iy = 0; iy < 4; iy++) {
416
410M
    for (size_t ix = 0; ix < 4; ix++) {
417
328M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
328M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
328M
    }
420
82.1M
  }
421
  // AFV coefficients in (even, even) positions.
422
20.5M
  HWY_ALIGN float coeff[4 * 4];
423
20.5M
  AFVDCT4x4(block, coeff);
424
102M
  for (size_t iy = 0; iy < 4; iy++) {
425
410M
    for (size_t ix = 0; ix < 4; ix++) {
426
328M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
328M
    }
428
82.1M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
20.5M
  ComputeScaledDCT<4, 4>()(
431
20.5M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
20.5M
              pixels_stride),
433
20.5M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
102M
  for (size_t iy = 0; iy < 4; iy++) {
436
739M
    for (size_t ix = 0; ix < 8; ix++) {
437
657M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
657M
    }
439
82.1M
  }
440
  // 4x8 DCT of the other half of the block.
441
20.5M
  ComputeScaledDCT<4, 8>()(
442
20.5M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
20.5M
      block, scratch_space);
444
102M
  for (size_t iy = 0; iy < 4; iy++) {
445
739M
    for (size_t ix = 0; ix < 8; ix++) {
446
657M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
657M
    }
448
82.1M
  }
449
20.5M
  float block00 = coefficients[0] * 0.25f;
450
20.5M
  float block01 = coefficients[1];
451
20.5M
  float block10 = coefficients[8];
452
20.5M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
20.5M
  coefficients[1] = (block00 - block01) * 0.5f;
454
20.5M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
20.5M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
20.5M
                            float* JXL_RESTRICT coefficients) {
411
20.5M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
20.5M
  size_t afv_x = afv_kind & 1;
413
20.5M
  size_t afv_y = afv_kind / 2;
414
20.5M
  HWY_ALIGN float block[4 * 8] = {};
415
102M
  for (size_t iy = 0; iy < 4; iy++) {
416
410M
    for (size_t ix = 0; ix < 4; ix++) {
417
328M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
328M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
328M
    }
420
82.1M
  }
421
  // AFV coefficients in (even, even) positions.
422
20.5M
  HWY_ALIGN float coeff[4 * 4];
423
20.5M
  AFVDCT4x4(block, coeff);
424
102M
  for (size_t iy = 0; iy < 4; iy++) {
425
410M
    for (size_t ix = 0; ix < 4; ix++) {
426
328M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
328M
    }
428
82.1M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
20.5M
  ComputeScaledDCT<4, 4>()(
431
20.5M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
20.5M
              pixels_stride),
433
20.5M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
102M
  for (size_t iy = 0; iy < 4; iy++) {
436
739M
    for (size_t ix = 0; ix < 8; ix++) {
437
657M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
657M
    }
439
82.1M
  }
440
  // 4x8 DCT of the other half of the block.
441
20.5M
  ComputeScaledDCT<4, 8>()(
442
20.5M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
20.5M
      block, scratch_space);
444
102M
  for (size_t iy = 0; iy < 4; iy++) {
445
739M
    for (size_t ix = 0; ix < 8; ix++) {
446
657M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
657M
    }
448
82.1M
  }
449
20.5M
  float block00 = coefficients[0] * 0.25f;
450
20.5M
  float block01 = coefficients[1];
451
20.5M
  float block10 = coefficients[8];
452
20.5M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
20.5M
  coefficients[1] = (block00 - block01) * 0.5f;
454
20.5M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
20.5M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
20.5M
                            float* JXL_RESTRICT coefficients) {
411
20.5M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
20.5M
  size_t afv_x = afv_kind & 1;
413
20.5M
  size_t afv_y = afv_kind / 2;
414
20.5M
  HWY_ALIGN float block[4 * 8] = {};
415
102M
  for (size_t iy = 0; iy < 4; iy++) {
416
410M
    for (size_t ix = 0; ix < 4; ix++) {
417
328M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
328M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
328M
    }
420
82.1M
  }
421
  // AFV coefficients in (even, even) positions.
422
20.5M
  HWY_ALIGN float coeff[4 * 4];
423
20.5M
  AFVDCT4x4(block, coeff);
424
102M
  for (size_t iy = 0; iy < 4; iy++) {
425
410M
    for (size_t ix = 0; ix < 4; ix++) {
426
328M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
328M
    }
428
82.1M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
20.5M
  ComputeScaledDCT<4, 4>()(
431
20.5M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
20.5M
              pixels_stride),
433
20.5M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
102M
  for (size_t iy = 0; iy < 4; iy++) {
436
739M
    for (size_t ix = 0; ix < 8; ix++) {
437
657M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
657M
    }
439
82.1M
  }
440
  // 4x8 DCT of the other half of the block.
441
20.5M
  ComputeScaledDCT<4, 8>()(
442
20.5M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
20.5M
      block, scratch_space);
444
102M
  for (size_t iy = 0; iy < 4; iy++) {
445
739M
    for (size_t ix = 0; ix < 8; ix++) {
446
657M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
657M
    }
448
82.1M
  }
449
20.5M
  float block00 = coefficients[0] * 0.25f;
450
20.5M
  float block01 = coefficients[1];
451
20.5M
  float block10 = coefficients[8];
452
20.5M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
20.5M
  coefficients[1] = (block00 - block01) * 0.5f;
454
20.5M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
20.5M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
346k
                            float* JXL_RESTRICT coefficients) {
411
346k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
346k
  size_t afv_x = afv_kind & 1;
413
346k
  size_t afv_y = afv_kind / 2;
414
346k
  HWY_ALIGN float block[4 * 8] = {};
415
1.73M
  for (size_t iy = 0; iy < 4; iy++) {
416
6.92M
    for (size_t ix = 0; ix < 4; ix++) {
417
5.54M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
5.54M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
5.54M
    }
420
1.38M
  }
421
  // AFV coefficients in (even, even) positions.
422
346k
  HWY_ALIGN float coeff[4 * 4];
423
346k
  AFVDCT4x4(block, coeff);
424
1.73M
  for (size_t iy = 0; iy < 4; iy++) {
425
6.92M
    for (size_t ix = 0; ix < 4; ix++) {
426
5.54M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
5.54M
    }
428
1.38M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
346k
  ComputeScaledDCT<4, 4>()(
431
346k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
346k
              pixels_stride),
433
346k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.73M
  for (size_t iy = 0; iy < 4; iy++) {
436
12.4M
    for (size_t ix = 0; ix < 8; ix++) {
437
11.0M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
11.0M
    }
439
1.38M
  }
440
  // 4x8 DCT of the other half of the block.
441
346k
  ComputeScaledDCT<4, 8>()(
442
346k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
346k
      block, scratch_space);
444
1.73M
  for (size_t iy = 0; iy < 4; iy++) {
445
12.4M
    for (size_t ix = 0; ix < 8; ix++) {
446
11.0M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
11.0M
    }
448
1.38M
  }
449
346k
  float block00 = coefficients[0] * 0.25f;
450
346k
  float block01 = coefficients[1];
451
346k
  float block10 = coefficients[8];
452
346k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
346k
  coefficients[1] = (block00 - block01) * 0.5f;
454
346k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
346k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
994k
                            float* JXL_RESTRICT coefficients) {
411
994k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
994k
  size_t afv_x = afv_kind & 1;
413
994k
  size_t afv_y = afv_kind / 2;
414
994k
  HWY_ALIGN float block[4 * 8] = {};
415
4.97M
  for (size_t iy = 0; iy < 4; iy++) {
416
19.8M
    for (size_t ix = 0; ix < 4; ix++) {
417
15.9M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
15.9M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
15.9M
    }
420
3.97M
  }
421
  // AFV coefficients in (even, even) positions.
422
994k
  HWY_ALIGN float coeff[4 * 4];
423
994k
  AFVDCT4x4(block, coeff);
424
4.97M
  for (size_t iy = 0; iy < 4; iy++) {
425
19.8M
    for (size_t ix = 0; ix < 4; ix++) {
426
15.9M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
15.9M
    }
428
3.97M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
994k
  ComputeScaledDCT<4, 4>()(
431
994k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
994k
              pixels_stride),
433
994k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
4.97M
  for (size_t iy = 0; iy < 4; iy++) {
436
35.7M
    for (size_t ix = 0; ix < 8; ix++) {
437
31.8M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
31.8M
    }
439
3.97M
  }
440
  // 4x8 DCT of the other half of the block.
441
994k
  ComputeScaledDCT<4, 8>()(
442
994k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
994k
      block, scratch_space);
444
4.97M
  for (size_t iy = 0; iy < 4; iy++) {
445
35.7M
    for (size_t ix = 0; ix < 8; ix++) {
446
31.8M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
31.8M
    }
448
3.97M
  }
449
994k
  float block00 = coefficients[0] * 0.25f;
450
994k
  float block01 = coefficients[1];
451
994k
  float block10 = coefficients[8];
452
994k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
994k
  coefficients[1] = (block00 - block01) * 0.5f;
454
994k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
994k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
216k
                            float* JXL_RESTRICT coefficients) {
411
216k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
216k
  size_t afv_x = afv_kind & 1;
413
216k
  size_t afv_y = afv_kind / 2;
414
216k
  HWY_ALIGN float block[4 * 8] = {};
415
1.08M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.33M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.46M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
3.46M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
3.46M
    }
420
866k
  }
421
  // AFV coefficients in (even, even) positions.
422
216k
  HWY_ALIGN float coeff[4 * 4];
423
216k
  AFVDCT4x4(block, coeff);
424
1.08M
  for (size_t iy = 0; iy < 4; iy++) {
425
4.33M
    for (size_t ix = 0; ix < 4; ix++) {
426
3.46M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
3.46M
    }
428
866k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
216k
  ComputeScaledDCT<4, 4>()(
431
216k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
216k
              pixels_stride),
433
216k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.08M
  for (size_t iy = 0; iy < 4; iy++) {
436
7.80M
    for (size_t ix = 0; ix < 8; ix++) {
437
6.93M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
6.93M
    }
439
866k
  }
440
  // 4x8 DCT of the other half of the block.
441
216k
  ComputeScaledDCT<4, 8>()(
442
216k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
216k
      block, scratch_space);
444
1.08M
  for (size_t iy = 0; iy < 4; iy++) {
445
7.80M
    for (size_t ix = 0; ix < 8; ix++) {
446
6.93M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
6.93M
    }
448
866k
  }
449
216k
  float block00 = coefficients[0] * 0.25f;
450
216k
  float block01 = coefficients[1];
451
216k
  float block10 = coefficients[8];
452
216k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
216k
  coefficients[1] = (block00 - block01) * 0.5f;
454
216k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
216k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
366k
                            float* JXL_RESTRICT coefficients) {
411
366k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
366k
  size_t afv_x = afv_kind & 1;
413
366k
  size_t afv_y = afv_kind / 2;
414
366k
  HWY_ALIGN float block[4 * 8] = {};
415
1.83M
  for (size_t iy = 0; iy < 4; iy++) {
416
7.33M
    for (size_t ix = 0; ix < 4; ix++) {
417
5.87M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
5.87M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
5.87M
    }
420
1.46M
  }
421
  // AFV coefficients in (even, even) positions.
422
366k
  HWY_ALIGN float coeff[4 * 4];
423
366k
  AFVDCT4x4(block, coeff);
424
1.83M
  for (size_t iy = 0; iy < 4; iy++) {
425
7.33M
    for (size_t ix = 0; ix < 4; ix++) {
426
5.87M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
5.87M
    }
428
1.46M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
366k
  ComputeScaledDCT<4, 4>()(
431
366k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
366k
              pixels_stride),
433
366k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.83M
  for (size_t iy = 0; iy < 4; iy++) {
436
13.2M
    for (size_t ix = 0; ix < 8; ix++) {
437
11.7M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
11.7M
    }
439
1.46M
  }
440
  // 4x8 DCT of the other half of the block.
441
366k
  ComputeScaledDCT<4, 8>()(
442
366k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
366k
      block, scratch_space);
444
1.83M
  for (size_t iy = 0; iy < 4; iy++) {
445
13.2M
    for (size_t ix = 0; ix < 8; ix++) {
446
11.7M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
11.7M
    }
448
1.46M
  }
449
366k
  float block00 = coefficients[0] * 0.25f;
450
366k
  float block01 = coefficients[1];
451
366k
  float block10 = coefficients[8];
452
366k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
366k
  coefficients[1] = (block00 - block01) * 0.5f;
454
366k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
366k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
456
457
HWY_MAYBE_UNUSED void TransformFromPixels(const AcStrategyType strategy,
458
                                          const float* JXL_RESTRICT pixels,
459
                                          size_t pixels_stride,
460
                                          float* JXL_RESTRICT coefficients,
461
296M
                                          float* JXL_RESTRICT scratch_space) {
462
296M
  using Type = AcStrategyType;
463
296M
  switch (strategy) {
464
22.7M
    case Type::IDENTITY: {
465
68.1M
      for (size_t y = 0; y < 2; y++) {
466
136M
        for (size_t x = 0; x < 2; x++) {
467
90.9M
          float block_dc = 0;
468
454M
          for (size_t iy = 0; iy < 4; iy++) {
469
1.81G
            for (size_t ix = 0; ix < 4; ix++) {
470
1.45G
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
1.45G
            }
472
363M
          }
473
90.9M
          block_dc *= 1.0f / 16;
474
454M
          for (size_t iy = 0; iy < 4; iy++) {
475
1.81G
            for (size_t ix = 0; ix < 4; ix++) {
476
1.45G
              if (ix == 1 && iy == 1) continue;
477
1.36G
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
1.36G
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
1.36G
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
1.36G
            }
481
363M
          }
482
90.9M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
90.9M
          coefficients[y * 8 + x] = block_dc;
484
90.9M
        }
485
45.4M
      }
486
22.7M
      float block00 = coefficients[0];
487
22.7M
      float block01 = coefficients[1];
488
22.7M
      float block10 = coefficients[8];
489
22.7M
      float block11 = coefficients[9];
490
22.7M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
22.7M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
22.7M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
22.7M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
22.7M
      break;
495
0
    }
496
21.4M
    case Type::DCT8X4: {
497
64.2M
      for (size_t x = 0; x < 2; x++) {
498
42.8M
        HWY_ALIGN float block[4 * 8];
499
42.8M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
42.8M
                                 scratch_space);
501
214M
        for (size_t iy = 0; iy < 4; iy++) {
502
1.54G
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
1.37G
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
1.37G
          }
506
171M
        }
507
42.8M
      }
508
21.4M
      float block0 = coefficients[0];
509
21.4M
      float block1 = coefficients[8];
510
21.4M
      coefficients[0] = (block0 + block1) * 0.5f;
511
21.4M
      coefficients[8] = (block0 - block1) * 0.5f;
512
21.4M
      break;
513
0
    }
514
21.2M
    case Type::DCT4X8: {
515
63.6M
      for (size_t y = 0; y < 2; y++) {
516
42.4M
        HWY_ALIGN float block[4 * 8];
517
42.4M
        ComputeScaledDCT<4, 8>()(
518
42.4M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
42.4M
            scratch_space);
520
212M
        for (size_t iy = 0; iy < 4; iy++) {
521
1.52G
          for (size_t ix = 0; ix < 8; ix++) {
522
1.35G
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
1.35G
          }
524
169M
        }
525
42.4M
      }
526
21.2M
      float block0 = coefficients[0];
527
21.2M
      float block1 = coefficients[8];
528
21.2M
      coefficients[0] = (block0 + block1) * 0.5f;
529
21.2M
      coefficients[8] = (block0 - block1) * 0.5f;
530
21.2M
      break;
531
0
    }
532
20.5M
    case Type::DCT4X4: {
533
61.6M
      for (size_t y = 0; y < 2; y++) {
534
123M
        for (size_t x = 0; x < 2; x++) {
535
82.1M
          HWY_ALIGN float block[4 * 4];
536
82.1M
          ComputeScaledDCT<4, 4>()(
537
82.1M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
82.1M
              block, scratch_space);
539
410M
          for (size_t iy = 0; iy < 4; iy++) {
540
1.64G
            for (size_t ix = 0; ix < 4; ix++) {
541
1.31G
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
1.31G
            }
543
328M
          }
544
82.1M
        }
545
41.0M
      }
546
20.5M
      float block00 = coefficients[0];
547
20.5M
      float block01 = coefficients[1];
548
20.5M
      float block10 = coefficients[8];
549
20.5M
      float block11 = coefficients[9];
550
20.5M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
20.5M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
20.5M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
20.5M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
20.5M
      break;
555
0
    }
556
22.8M
    case Type::DCT2X2: {
557
22.8M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
22.8M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
22.8M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
22.8M
      break;
561
0
    }
562
8.71M
    case Type::DCT16X16: {
563
8.71M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
8.71M
                                 scratch_space);
565
8.71M
      break;
566
0
    }
567
17.0M
    case Type::DCT16X8: {
568
17.0M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
17.0M
                                scratch_space);
570
17.0M
      break;
571
0
    }
572
17.4M
    case Type::DCT8X16: {
573
17.4M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
17.4M
                                scratch_space);
575
17.4M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
3.37M
    case Type::DCT32X16: {
588
3.37M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
3.37M
                                 scratch_space);
590
3.37M
      break;
591
0
    }
592
3.52M
    case Type::DCT16X32: {
593
3.52M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
3.52M
                                 scratch_space);
595
3.52M
      break;
596
0
    }
597
2.14M
    case Type::DCT32X32: {
598
2.14M
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
2.14M
                                 scratch_space);
600
2.14M
      break;
601
0
    }
602
47.5M
    case Type::DCT: {
603
47.5M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
47.5M
                               scratch_space);
605
47.5M
      break;
606
0
    }
607
21.2M
    case Type::AFV0: {
608
21.2M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
21.2M
      break;
610
0
    }
611
22.5M
    case Type::AFV1: {
612
22.5M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
22.5M
      break;
614
0
    }
615
20.9M
    case Type::AFV2: {
616
20.9M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
20.9M
      break;
618
0
    }
619
21.2M
    case Type::AFV3: {
620
21.2M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
21.2M
      break;
622
0
    }
623
413k
    case Type::DCT64X64: {
624
413k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
413k
                                 scratch_space);
626
413k
      break;
627
0
    }
628
1.06M
    case Type::DCT64X32: {
629
1.06M
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
1.06M
                                 scratch_space);
631
1.06M
      break;
632
0
    }
633
706k
    case Type::DCT32X64: {
634
706k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
706k
                                 scratch_space);
636
706k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
296M
  }
669
296M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
9.61M
                                          float* JXL_RESTRICT scratch_space) {
462
9.61M
  using Type = AcStrategyType;
463
9.61M
  switch (strategy) {
464
1.09M
    case Type::IDENTITY: {
465
3.29M
      for (size_t y = 0; y < 2; y++) {
466
6.58M
        for (size_t x = 0; x < 2; x++) {
467
4.38M
          float block_dc = 0;
468
21.9M
          for (size_t iy = 0; iy < 4; iy++) {
469
87.7M
            for (size_t ix = 0; ix < 4; ix++) {
470
70.1M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
70.1M
            }
472
17.5M
          }
473
4.38M
          block_dc *= 1.0f / 16;
474
21.9M
          for (size_t iy = 0; iy < 4; iy++) {
475
87.7M
            for (size_t ix = 0; ix < 4; ix++) {
476
70.1M
              if (ix == 1 && iy == 1) continue;
477
65.8M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
65.8M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
65.8M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
65.8M
            }
481
17.5M
          }
482
4.38M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
4.38M
          coefficients[y * 8 + x] = block_dc;
484
4.38M
        }
485
2.19M
      }
486
1.09M
      float block00 = coefficients[0];
487
1.09M
      float block01 = coefficients[1];
488
1.09M
      float block10 = coefficients[8];
489
1.09M
      float block11 = coefficients[9];
490
1.09M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
1.09M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
1.09M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
1.09M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
1.09M
      break;
495
0
    }
496
440k
    case Type::DCT8X4: {
497
1.32M
      for (size_t x = 0; x < 2; x++) {
498
881k
        HWY_ALIGN float block[4 * 8];
499
881k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
881k
                                 scratch_space);
501
4.40M
        for (size_t iy = 0; iy < 4; iy++) {
502
31.7M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
28.2M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
28.2M
          }
506
3.52M
        }
507
881k
      }
508
440k
      float block0 = coefficients[0];
509
440k
      float block1 = coefficients[8];
510
440k
      coefficients[0] = (block0 + block1) * 0.5f;
511
440k
      coefficients[8] = (block0 - block1) * 0.5f;
512
440k
      break;
513
0
    }
514
348k
    case Type::DCT4X8: {
515
1.04M
      for (size_t y = 0; y < 2; y++) {
516
696k
        HWY_ALIGN float block[4 * 8];
517
696k
        ComputeScaledDCT<4, 8>()(
518
696k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
696k
            scratch_space);
520
3.48M
        for (size_t iy = 0; iy < 4; iy++) {
521
25.0M
          for (size_t ix = 0; ix < 8; ix++) {
522
22.2M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
22.2M
          }
524
2.78M
        }
525
696k
      }
526
348k
      float block0 = coefficients[0];
527
348k
      float block1 = coefficients[8];
528
348k
      coefficients[0] = (block0 + block1) * 0.5f;
529
348k
      coefficients[8] = (block0 - block1) * 0.5f;
530
348k
      break;
531
0
    }
532
2.40k
    case Type::DCT4X4: {
533
7.20k
      for (size_t y = 0; y < 2; y++) {
534
14.4k
        for (size_t x = 0; x < 2; x++) {
535
9.60k
          HWY_ALIGN float block[4 * 4];
536
9.60k
          ComputeScaledDCT<4, 4>()(
537
9.60k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
9.60k
              block, scratch_space);
539
48.0k
          for (size_t iy = 0; iy < 4; iy++) {
540
192k
            for (size_t ix = 0; ix < 4; ix++) {
541
153k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
153k
            }
543
38.4k
          }
544
9.60k
        }
545
4.80k
      }
546
2.40k
      float block00 = coefficients[0];
547
2.40k
      float block01 = coefficients[1];
548
2.40k
      float block10 = coefficients[8];
549
2.40k
      float block11 = coefficients[9];
550
2.40k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
2.40k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
2.40k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
2.40k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
2.40k
      break;
555
0
    }
556
1.16M
    case Type::DCT2X2: {
557
1.16M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
1.16M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
1.16M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
1.16M
      break;
561
0
    }
562
166k
    case Type::DCT16X16: {
563
166k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
166k
                                 scratch_space);
565
166k
      break;
566
0
    }
567
274k
    case Type::DCT16X8: {
568
274k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
274k
                                scratch_space);
570
274k
      break;
571
0
    }
572
436k
    case Type::DCT8X16: {
573
436k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
436k
                                scratch_space);
575
436k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
74.3k
    case Type::DCT32X16: {
588
74.3k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
74.3k
                                 scratch_space);
590
74.3k
      break;
591
0
    }
592
124k
    case Type::DCT16X32: {
593
124k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
124k
                                 scratch_space);
595
124k
      break;
596
0
    }
597
248k
    case Type::DCT32X32: {
598
248k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
248k
                                 scratch_space);
600
248k
      break;
601
0
    }
602
3.22M
    case Type::DCT: {
603
3.22M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
3.22M
                               scratch_space);
605
3.22M
      break;
606
0
    }
607
346k
    case Type::AFV0: {
608
346k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
346k
      break;
610
0
    }
611
994k
    case Type::AFV1: {
612
994k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
994k
      break;
614
0
    }
615
216k
    case Type::AFV2: {
616
216k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
216k
      break;
618
0
    }
619
366k
    case Type::AFV3: {
620
366k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
366k
      break;
622
0
    }
623
64.1k
    case Type::DCT64X64: {
624
64.1k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
64.1k
                                 scratch_space);
626
64.1k
      break;
627
0
    }
628
7.77k
    case Type::DCT64X32: {
629
7.77k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
7.77k
                                 scratch_space);
631
7.77k
      break;
632
0
    }
633
10.2k
    case Type::DCT32X64: {
634
10.2k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
10.2k
                                 scratch_space);
636
10.2k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
9.61M
  }
669
9.61M
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
256M
                                          float* JXL_RESTRICT scratch_space) {
462
256M
  using Type = AcStrategyType;
463
256M
  switch (strategy) {
464
20.5M
    case Type::IDENTITY: {
465
61.6M
      for (size_t y = 0; y < 2; y++) {
466
123M
        for (size_t x = 0; x < 2; x++) {
467
82.1M
          float block_dc = 0;
468
410M
          for (size_t iy = 0; iy < 4; iy++) {
469
1.64G
            for (size_t ix = 0; ix < 4; ix++) {
470
1.31G
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
1.31G
            }
472
328M
          }
473
82.1M
          block_dc *= 1.0f / 16;
474
410M
          for (size_t iy = 0; iy < 4; iy++) {
475
1.64G
            for (size_t ix = 0; ix < 4; ix++) {
476
1.31G
              if (ix == 1 && iy == 1) continue;
477
1.23G
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
1.23G
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
1.23G
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
1.23G
            }
481
328M
          }
482
82.1M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
82.1M
          coefficients[y * 8 + x] = block_dc;
484
82.1M
        }
485
41.0M
      }
486
20.5M
      float block00 = coefficients[0];
487
20.5M
      float block01 = coefficients[1];
488
20.5M
      float block10 = coefficients[8];
489
20.5M
      float block11 = coefficients[9];
490
20.5M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
20.5M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
20.5M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
20.5M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
20.5M
      break;
495
0
    }
496
20.5M
    case Type::DCT8X4: {
497
61.6M
      for (size_t x = 0; x < 2; x++) {
498
41.0M
        HWY_ALIGN float block[4 * 8];
499
41.0M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
41.0M
                                 scratch_space);
501
205M
        for (size_t iy = 0; iy < 4; iy++) {
502
1.47G
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
1.31G
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
1.31G
          }
506
164M
        }
507
41.0M
      }
508
20.5M
      float block0 = coefficients[0];
509
20.5M
      float block1 = coefficients[8];
510
20.5M
      coefficients[0] = (block0 + block1) * 0.5f;
511
20.5M
      coefficients[8] = (block0 - block1) * 0.5f;
512
20.5M
      break;
513
0
    }
514
20.5M
    case Type::DCT4X8: {
515
61.6M
      for (size_t y = 0; y < 2; y++) {
516
41.0M
        HWY_ALIGN float block[4 * 8];
517
41.0M
        ComputeScaledDCT<4, 8>()(
518
41.0M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
41.0M
            scratch_space);
520
205M
        for (size_t iy = 0; iy < 4; iy++) {
521
1.47G
          for (size_t ix = 0; ix < 8; ix++) {
522
1.31G
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
1.31G
          }
524
164M
        }
525
41.0M
      }
526
20.5M
      float block0 = coefficients[0];
527
20.5M
      float block1 = coefficients[8];
528
20.5M
      coefficients[0] = (block0 + block1) * 0.5f;
529
20.5M
      coefficients[8] = (block0 - block1) * 0.5f;
530
20.5M
      break;
531
0
    }
532
20.5M
    case Type::DCT4X4: {
533
61.6M
      for (size_t y = 0; y < 2; y++) {
534
123M
        for (size_t x = 0; x < 2; x++) {
535
82.1M
          HWY_ALIGN float block[4 * 4];
536
82.1M
          ComputeScaledDCT<4, 4>()(
537
82.1M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
82.1M
              block, scratch_space);
539
410M
          for (size_t iy = 0; iy < 4; iy++) {
540
1.64G
            for (size_t ix = 0; ix < 4; ix++) {
541
1.31G
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
1.31G
            }
543
328M
          }
544
82.1M
        }
545
41.0M
      }
546
20.5M
      float block00 = coefficients[0];
547
20.5M
      float block01 = coefficients[1];
548
20.5M
      float block10 = coefficients[8];
549
20.5M
      float block11 = coefficients[9];
550
20.5M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
20.5M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
20.5M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
20.5M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
20.5M
      break;
555
0
    }
556
20.5M
    case Type::DCT2X2: {
557
20.5M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
20.5M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
20.5M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
20.5M
      break;
561
0
    }
562
8.38M
    case Type::DCT16X16: {
563
8.38M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
8.38M
                                 scratch_space);
565
8.38M
      break;
566
0
    }
567
16.4M
    case Type::DCT16X8: {
568
16.4M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
16.4M
                                scratch_space);
570
16.4M
      break;
571
0
    }
572
16.5M
    case Type::DCT8X16: {
573
16.5M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
16.5M
                                scratch_space);
575
16.5M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
3.22M
    case Type::DCT32X16: {
588
3.22M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
3.22M
                                 scratch_space);
590
3.22M
      break;
591
0
    }
592
3.27M
    case Type::DCT16X32: {
593
3.27M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
3.27M
                                 scratch_space);
595
3.27M
      break;
596
0
    }
597
1.65M
    case Type::DCT32X32: {
598
1.65M
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
1.65M
                                 scratch_space);
600
1.65M
      break;
601
0
    }
602
20.5M
    case Type::DCT: {
603
20.5M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
20.5M
                               scratch_space);
605
20.5M
      break;
606
0
    }
607
20.5M
    case Type::AFV0: {
608
20.5M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
20.5M
      break;
610
0
    }
611
20.5M
    case Type::AFV1: {
612
20.5M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
20.5M
      break;
614
0
    }
615
20.5M
    case Type::AFV2: {
616
20.5M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
20.5M
      break;
618
0
    }
619
20.5M
    case Type::AFV3: {
620
20.5M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
20.5M
      break;
622
0
    }
623
285k
    case Type::DCT64X64: {
624
285k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
285k
                                 scratch_space);
626
285k
      break;
627
0
    }
628
1.04M
    case Type::DCT64X32: {
629
1.04M
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
1.04M
                                 scratch_space);
631
1.04M
      break;
632
0
    }
633
685k
    case Type::DCT32X64: {
634
685k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
685k
                                 scratch_space);
636
685k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
256M
  }
669
256M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
30.1M
                                          float* JXL_RESTRICT scratch_space) {
462
30.1M
  using Type = AcStrategyType;
463
30.1M
  switch (strategy) {
464
1.09M
    case Type::IDENTITY: {
465
3.29M
      for (size_t y = 0; y < 2; y++) {
466
6.58M
        for (size_t x = 0; x < 2; x++) {
467
4.38M
          float block_dc = 0;
468
21.9M
          for (size_t iy = 0; iy < 4; iy++) {
469
87.7M
            for (size_t ix = 0; ix < 4; ix++) {
470
70.1M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
70.1M
            }
472
17.5M
          }
473
4.38M
          block_dc *= 1.0f / 16;
474
21.9M
          for (size_t iy = 0; iy < 4; iy++) {
475
87.7M
            for (size_t ix = 0; ix < 4; ix++) {
476
70.1M
              if (ix == 1 && iy == 1) continue;
477
65.8M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
65.8M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
65.8M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
65.8M
            }
481
17.5M
          }
482
4.38M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
4.38M
          coefficients[y * 8 + x] = block_dc;
484
4.38M
        }
485
2.19M
      }
486
1.09M
      float block00 = coefficients[0];
487
1.09M
      float block01 = coefficients[1];
488
1.09M
      float block10 = coefficients[8];
489
1.09M
      float block11 = coefficients[9];
490
1.09M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
1.09M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
1.09M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
1.09M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
1.09M
      break;
495
0
    }
496
440k
    case Type::DCT8X4: {
497
1.32M
      for (size_t x = 0; x < 2; x++) {
498
881k
        HWY_ALIGN float block[4 * 8];
499
881k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
881k
                                 scratch_space);
501
4.40M
        for (size_t iy = 0; iy < 4; iy++) {
502
31.7M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
28.2M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
28.2M
          }
506
3.52M
        }
507
881k
      }
508
440k
      float block0 = coefficients[0];
509
440k
      float block1 = coefficients[8];
510
440k
      coefficients[0] = (block0 + block1) * 0.5f;
511
440k
      coefficients[8] = (block0 - block1) * 0.5f;
512
440k
      break;
513
0
    }
514
348k
    case Type::DCT4X8: {
515
1.04M
      for (size_t y = 0; y < 2; y++) {
516
696k
        HWY_ALIGN float block[4 * 8];
517
696k
        ComputeScaledDCT<4, 8>()(
518
696k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
696k
            scratch_space);
520
3.48M
        for (size_t iy = 0; iy < 4; iy++) {
521
25.0M
          for (size_t ix = 0; ix < 8; ix++) {
522
22.2M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
22.2M
          }
524
2.78M
        }
525
696k
      }
526
348k
      float block0 = coefficients[0];
527
348k
      float block1 = coefficients[8];
528
348k
      coefficients[0] = (block0 + block1) * 0.5f;
529
348k
      coefficients[8] = (block0 - block1) * 0.5f;
530
348k
      break;
531
0
    }
532
2.40k
    case Type::DCT4X4: {
533
7.20k
      for (size_t y = 0; y < 2; y++) {
534
14.4k
        for (size_t x = 0; x < 2; x++) {
535
9.60k
          HWY_ALIGN float block[4 * 4];
536
9.60k
          ComputeScaledDCT<4, 4>()(
537
9.60k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
9.60k
              block, scratch_space);
539
48.0k
          for (size_t iy = 0; iy < 4; iy++) {
540
192k
            for (size_t ix = 0; ix < 4; ix++) {
541
153k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
153k
            }
543
38.4k
          }
544
9.60k
        }
545
4.80k
      }
546
2.40k
      float block00 = coefficients[0];
547
2.40k
      float block01 = coefficients[1];
548
2.40k
      float block10 = coefficients[8];
549
2.40k
      float block11 = coefficients[9];
550
2.40k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
2.40k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
2.40k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
2.40k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
2.40k
      break;
555
0
    }
556
1.16M
    case Type::DCT2X2: {
557
1.16M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
1.16M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
1.16M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
1.16M
      break;
561
0
    }
562
166k
    case Type::DCT16X16: {
563
166k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
166k
                                 scratch_space);
565
166k
      break;
566
0
    }
567
274k
    case Type::DCT16X8: {
568
274k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
274k
                                scratch_space);
570
274k
      break;
571
0
    }
572
436k
    case Type::DCT8X16: {
573
436k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
436k
                                scratch_space);
575
436k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
74.3k
    case Type::DCT32X16: {
588
74.3k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
74.3k
                                 scratch_space);
590
74.3k
      break;
591
0
    }
592
124k
    case Type::DCT16X32: {
593
124k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
124k
                                 scratch_space);
595
124k
      break;
596
0
    }
597
248k
    case Type::DCT32X32: {
598
248k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
248k
                                 scratch_space);
600
248k
      break;
601
0
    }
602
23.7M
    case Type::DCT: {
603
23.7M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
23.7M
                               scratch_space);
605
23.7M
      break;
606
0
    }
607
346k
    case Type::AFV0: {
608
346k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
346k
      break;
610
0
    }
611
994k
    case Type::AFV1: {
612
994k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
994k
      break;
614
0
    }
615
216k
    case Type::AFV2: {
616
216k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
216k
      break;
618
0
    }
619
366k
    case Type::AFV3: {
620
366k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
366k
      break;
622
0
    }
623
64.1k
    case Type::DCT64X64: {
624
64.1k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
64.1k
                                 scratch_space);
626
64.1k
      break;
627
0
    }
628
7.77k
    case Type::DCT64X32: {
629
7.77k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
7.77k
                                 scratch_space);
631
7.77k
      break;
632
0
    }
633
10.2k
    case Type::DCT32X64: {
634
10.2k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
10.2k
                                 scratch_space);
636
10.2k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
30.1M
  }
669
30.1M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
670
671
// `scratch_space` should be at least 4 * kMaxBlocks * kMaxBlocks elements.
672
HWY_MAYBE_UNUSED void DCFromLowestFrequencies(const AcStrategyType strategy,
673
                                              const float* block, float* dc,
674
                                              size_t dc_stride,
675
39.7M
                                              float* scratch_space) {
676
39.7M
  using Type = AcStrategyType;
677
39.7M
  switch (strategy) {
678
549k
    case Type::DCT16X8: {
679
549k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
549k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
549k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
549k
      break;
683
0
    }
684
872k
    case Type::DCT8X16: {
685
872k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
872k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
872k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
872k
      break;
689
0
    }
690
332k
    case Type::DCT16X16: {
691
332k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
332k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
332k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
332k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
148k
    case Type::DCT32X16: {
709
148k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
148k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
148k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
148k
      break;
713
0
    }
714
249k
    case Type::DCT16X32: {
715
249k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
249k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
249k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
249k
      break;
719
0
    }
720
496k
    case Type::DCT32X32: {
721
496k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
496k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
496k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
496k
      break;
725
0
    }
726
15.5k
    case Type::DCT64X32: {
727
15.5k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
15.5k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
15.5k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
15.5k
      break;
731
0
    }
732
20.5k
    case Type::DCT32X64: {
733
20.5k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
20.5k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
20.5k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
20.5k
      break;
737
0
    }
738
128k
    case Type::DCT64X64: {
739
128k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
128k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
128k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
128k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
26.9M
    case Type::DCT:
787
29.3M
    case Type::DCT2X2:
788
29.3M
    case Type::DCT4X4:
789
30.0M
    case Type::DCT4X8:
790
30.9M
    case Type::DCT8X4:
791
31.6M
    case Type::AFV0:
792
33.5M
    case Type::AFV1:
793
34.0M
    case Type::AFV2:
794
34.7M
    case Type::AFV3:
795
36.9M
    case Type::IDENTITY:
796
36.9M
      dc[0] = block[0];
797
36.9M
      break;
798
39.7M
  }
799
39.7M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
9.61M
                                              float* scratch_space) {
676
9.61M
  using Type = AcStrategyType;
677
9.61M
  switch (strategy) {
678
274k
    case Type::DCT16X8: {
679
274k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
274k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
274k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
274k
      break;
683
0
    }
684
436k
    case Type::DCT8X16: {
685
436k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
436k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
436k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
436k
      break;
689
0
    }
690
166k
    case Type::DCT16X16: {
691
166k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
166k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
166k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
166k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
74.3k
    case Type::DCT32X16: {
709
74.3k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
74.3k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
74.3k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
74.3k
      break;
713
0
    }
714
124k
    case Type::DCT16X32: {
715
124k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
124k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
124k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
124k
      break;
719
0
    }
720
248k
    case Type::DCT32X32: {
721
248k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
248k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
248k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
248k
      break;
725
0
    }
726
7.77k
    case Type::DCT64X32: {
727
7.77k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
7.77k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
7.77k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
7.77k
      break;
731
0
    }
732
10.2k
    case Type::DCT32X64: {
733
10.2k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
10.2k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
10.2k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
10.2k
      break;
737
0
    }
738
64.1k
    case Type::DCT64X64: {
739
64.1k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
64.1k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
64.1k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
64.1k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
3.22M
    case Type::DCT:
787
4.39M
    case Type::DCT2X2:
788
4.39M
    case Type::DCT4X4:
789
4.74M
    case Type::DCT4X8:
790
5.18M
    case Type::DCT8X4:
791
5.53M
    case Type::AFV0:
792
6.52M
    case Type::AFV1:
793
6.74M
    case Type::AFV2:
794
7.11M
    case Type::AFV3:
795
8.20M
    case Type::IDENTITY:
796
8.20M
      dc[0] = block[0];
797
8.20M
      break;
798
9.61M
  }
799
9.61M
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
30.1M
                                              float* scratch_space) {
676
30.1M
  using Type = AcStrategyType;
677
30.1M
  switch (strategy) {
678
274k
    case Type::DCT16X8: {
679
274k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
274k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
274k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
274k
      break;
683
0
    }
684
436k
    case Type::DCT8X16: {
685
436k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
436k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
436k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
436k
      break;
689
0
    }
690
166k
    case Type::DCT16X16: {
691
166k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
166k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
166k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
166k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
74.3k
    case Type::DCT32X16: {
709
74.3k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
74.3k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
74.3k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
74.3k
      break;
713
0
    }
714
124k
    case Type::DCT16X32: {
715
124k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
124k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
124k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
124k
      break;
719
0
    }
720
248k
    case Type::DCT32X32: {
721
248k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
248k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
248k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
248k
      break;
725
0
    }
726
7.77k
    case Type::DCT64X32: {
727
7.77k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
7.77k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
7.77k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
7.77k
      break;
731
0
    }
732
10.2k
    case Type::DCT32X64: {
733
10.2k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
10.2k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
10.2k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
10.2k
      break;
737
0
    }
738
64.1k
    case Type::DCT64X64: {
739
64.1k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
64.1k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
64.1k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
64.1k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
23.7M
    case Type::DCT:
787
24.9M
    case Type::DCT2X2:
788
24.9M
    case Type::DCT4X4:
789
25.2M
    case Type::DCT4X8:
790
25.7M
    case Type::DCT8X4:
791
26.0M
    case Type::AFV0:
792
27.0M
    case Type::AFV1:
793
27.2M
    case Type::AFV2:
794
27.6M
    case Type::AFV3:
795
28.7M
    case Type::IDENTITY:
796
28.7M
      dc[0] = block[0];
797
28.7M
      break;
798
30.1M
  }
799
30.1M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
800
801
}  // namespace
802
// NOLINTNEXTLINE(google-readability-namespace-comments)
803
}  // namespace HWY_NAMESPACE
804
}  // namespace jxl
805
HWY_AFTER_NAMESPACE();
806
807
#endif  // LIB_JXL_ENC_TRANSFORMS_INL_H_