Coverage Report

Created: 2026-04-01 07:49

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libjxl/lib/jxl/enc_transforms-inl.h
Line
Count
Source
1
// Copyright (c) the JPEG XL Project Authors. All rights reserved.
2
//
3
// Use of this source code is governed by a BSD-style
4
// license that can be found in the LICENSE file.
5
6
#include "lib/jxl/base/compiler_specific.h"
7
#include "lib/jxl/frame_dimensions.h"
8
9
#if defined(LIB_JXL_ENC_TRANSFORMS_INL_H_) == defined(HWY_TARGET_TOGGLE)
10
#ifdef LIB_JXL_ENC_TRANSFORMS_INL_H_
11
#undef LIB_JXL_ENC_TRANSFORMS_INL_H_
12
#else
13
#define LIB_JXL_ENC_TRANSFORMS_INL_H_
14
#endif
15
16
#include <cstddef>
17
#include <cstdint>
18
#include <hwy/highway.h>
19
20
#include "lib/jxl/ac_strategy.h"
21
#include "lib/jxl/dct-inl.h"
22
#include "lib/jxl/dct_scales.h"
23
24
HWY_BEFORE_NAMESPACE();
25
namespace jxl {
26
27
enum class AcStrategyType : uint32_t;
28
29
namespace HWY_NAMESPACE {
30
namespace {
31
32
constexpr size_t kMaxBlocks = 32;
33
34
// Inverse of ReinterpretingDCT.
35
template <size_t DCT_ROWS, size_t DCT_COLS, size_t LF_ROWS, size_t LF_COLS,
36
          size_t ROWS, size_t COLS>
37
HWY_INLINE void ReinterpretingIDCT(const float* input,
38
                                   const size_t input_stride, float* output,
39
2.93M
                                   const size_t output_stride, float* scratch) {
40
2.93M
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
2.93M
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
2.93M
  float* block = scratch;
43
2.93M
  if (ROWS < COLS) {
44
2.75M
    for (size_t y = 0; y < LF_ROWS; y++) {
45
6.34M
      for (size_t x = 0; x < LF_COLS; x++) {
46
4.79M
        block[y * COLS + x] = input[y * input_stride + x] *
47
4.79M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
4.79M
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
4.79M
      }
50
1.55M
    }
51
1.73M
  } else {
52
6.26M
    for (size_t y = 0; y < LF_COLS; y++) {
53
23.8M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
19.3M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
19.3M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
19.3M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
19.3M
      }
58
4.53M
    }
59
1.73M
  }
60
61
2.93M
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
2.93M
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
2.93M
                                  scratch_space);
64
2.93M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
297k
                                   const size_t output_stride, float* scratch) {
40
297k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
297k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
297k
  float* block = scratch;
43
297k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
297k
  } else {
52
594k
    for (size_t y = 0; y < LF_COLS; y++) {
53
891k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
594k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
594k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
594k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
594k
      }
58
297k
    }
59
297k
  }
60
61
297k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
297k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
297k
                                  scratch_space);
64
297k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
444k
                                   const size_t output_stride, float* scratch) {
40
444k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
444k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
444k
  float* block = scratch;
43
444k
  if (ROWS < COLS) {
44
889k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.33M
      for (size_t x = 0; x < LF_COLS; x++) {
46
889k
        block[y * COLS + x] = input[y * input_stride + x] *
47
889k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
889k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
889k
      }
50
444k
    }
51
444k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
444k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
444k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
444k
                                  scratch_space);
64
444k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
169k
                                   const size_t output_stride, float* scratch) {
40
169k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
169k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
169k
  float* block = scratch;
43
169k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
169k
  } else {
52
507k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.01M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
676k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
676k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
676k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
676k
      }
58
338k
    }
59
169k
  }
60
61
169k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
169k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
169k
                                  scratch_space);
64
169k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
93.4k
                                   const size_t output_stride, float* scratch) {
40
93.4k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
93.4k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
93.4k
  float* block = scratch;
43
93.4k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
93.4k
  } else {
52
280k
    for (size_t y = 0; y < LF_COLS; y++) {
53
934k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
747k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
747k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
747k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
747k
      }
58
186k
    }
59
93.4k
  }
60
61
93.4k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
93.4k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
93.4k
                                  scratch_space);
64
93.4k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
143k
                                   const size_t output_stride, float* scratch) {
40
143k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
143k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
143k
  float* block = scratch;
43
143k
  if (ROWS < COLS) {
44
430k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.43M
      for (size_t x = 0; x < LF_COLS; x++) {
46
1.14M
        block[y * COLS + x] = input[y * input_stride + x] *
47
1.14M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
1.14M
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
1.14M
      }
50
287k
    }
51
143k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
143k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
143k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
143k
                                  scratch_space);
64
143k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
244k
                                   const size_t output_stride, float* scratch) {
40
244k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
244k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
244k
  float* block = scratch;
43
244k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
244k
  } else {
52
1.22M
    for (size_t y = 0; y < LF_COLS; y++) {
53
4.88M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
3.90M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
3.90M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
3.90M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
3.90M
      }
58
976k
    }
59
244k
  }
60
61
244k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
244k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
244k
                                  scratch_space);
64
244k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
8.17k
                                   const size_t output_stride, float* scratch) {
40
8.17k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
8.17k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
8.17k
  float* block = scratch;
43
8.17k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
8.17k
  } else {
52
40.8k
    for (size_t y = 0; y < LF_COLS; y++) {
53
294k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
261k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
261k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
261k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
261k
      }
58
32.7k
    }
59
8.17k
  }
60
61
8.17k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
8.17k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
8.17k
                                  scratch_space);
64
8.17k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
11.2k
                                   const size_t output_stride, float* scratch) {
40
11.2k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
11.2k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
11.2k
  float* block = scratch;
43
11.2k
  if (ROWS < COLS) {
44
56.1k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
404k
      for (size_t x = 0; x < LF_COLS; x++) {
46
359k
        block[y * COLS + x] = input[y * input_stride + x] *
47
359k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
359k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
359k
      }
50
44.9k
    }
51
11.2k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
11.2k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
11.2k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
11.2k
                                  scratch_space);
64
11.2k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
54.6k
                                   const size_t output_stride, float* scratch) {
40
54.6k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
54.6k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
54.6k
  float* block = scratch;
43
54.6k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
54.6k
  } else {
52
491k
    for (size_t y = 0; y < LF_COLS; y++) {
53
3.93M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
3.49M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
3.49M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
3.49M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
3.49M
      }
58
436k
    }
59
54.6k
  }
60
61
54.6k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
54.6k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
54.6k
                                  scratch_space);
64
54.6k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
297k
                                   const size_t output_stride, float* scratch) {
40
297k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
297k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
297k
  float* block = scratch;
43
297k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
297k
  } else {
52
594k
    for (size_t y = 0; y < LF_COLS; y++) {
53
891k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
594k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
594k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
594k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
594k
      }
58
297k
    }
59
297k
  }
60
61
297k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
297k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
297k
                                  scratch_space);
64
297k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
444k
                                   const size_t output_stride, float* scratch) {
40
444k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
444k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
444k
  float* block = scratch;
43
444k
  if (ROWS < COLS) {
44
889k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.33M
      for (size_t x = 0; x < LF_COLS; x++) {
46
889k
        block[y * COLS + x] = input[y * input_stride + x] *
47
889k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
889k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
889k
      }
50
444k
    }
51
444k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
444k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
444k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
444k
                                  scratch_space);
64
444k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
169k
                                   const size_t output_stride, float* scratch) {
40
169k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
169k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
169k
  float* block = scratch;
43
169k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
169k
  } else {
52
507k
    for (size_t y = 0; y < LF_COLS; y++) {
53
1.01M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
676k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
676k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
676k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
676k
      }
58
338k
    }
59
169k
  }
60
61
169k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
169k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
169k
                                  scratch_space);
64
169k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
93.4k
                                   const size_t output_stride, float* scratch) {
40
93.4k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
93.4k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
93.4k
  float* block = scratch;
43
93.4k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
93.4k
  } else {
52
280k
    for (size_t y = 0; y < LF_COLS; y++) {
53
934k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
747k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
747k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
747k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
747k
      }
58
186k
    }
59
93.4k
  }
60
61
93.4k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
93.4k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
93.4k
                                  scratch_space);
64
93.4k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
143k
                                   const size_t output_stride, float* scratch) {
40
143k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
143k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
143k
  float* block = scratch;
43
143k
  if (ROWS < COLS) {
44
430k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
1.43M
      for (size_t x = 0; x < LF_COLS; x++) {
46
1.14M
        block[y * COLS + x] = input[y * input_stride + x] *
47
1.14M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
1.14M
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
1.14M
      }
50
287k
    }
51
143k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
143k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
143k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
143k
                                  scratch_space);
64
143k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
244k
                                   const size_t output_stride, float* scratch) {
40
244k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
244k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
244k
  float* block = scratch;
43
244k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
244k
  } else {
52
1.22M
    for (size_t y = 0; y < LF_COLS; y++) {
53
4.88M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
3.90M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
3.90M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
3.90M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
3.90M
      }
58
976k
    }
59
244k
  }
60
61
244k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
244k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
244k
                                  scratch_space);
64
244k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
8.17k
                                   const size_t output_stride, float* scratch) {
40
8.17k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
8.17k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
8.17k
  float* block = scratch;
43
8.17k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
8.17k
  } else {
52
40.8k
    for (size_t y = 0; y < LF_COLS; y++) {
53
294k
      for (size_t x = 0; x < LF_ROWS; x++) {
54
261k
        block[y * ROWS + x] = input[y * input_stride + x] *
55
261k
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
261k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
261k
      }
58
32.7k
    }
59
8.17k
  }
60
61
8.17k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
8.17k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
8.17k
                                  scratch_space);
64
8.17k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
11.2k
                                   const size_t output_stride, float* scratch) {
40
11.2k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
11.2k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
11.2k
  float* block = scratch;
43
11.2k
  if (ROWS < COLS) {
44
56.1k
    for (size_t y = 0; y < LF_ROWS; y++) {
45
404k
      for (size_t x = 0; x < LF_COLS; x++) {
46
359k
        block[y * COLS + x] = input[y * input_stride + x] *
47
359k
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
359k
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
359k
      }
50
44.9k
    }
51
11.2k
  } else {
52
0
    for (size_t y = 0; y < LF_COLS; y++) {
53
0
      for (size_t x = 0; x < LF_ROWS; x++) {
54
0
        block[y * ROWS + x] = input[y * input_stride + x] *
55
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
0
      }
58
0
    }
59
0
  }
60
61
11.2k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
11.2k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
11.2k
                                  scratch_space);
64
11.2k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Line
Count
Source
39
54.6k
                                   const size_t output_stride, float* scratch) {
40
54.6k
  static_assert(ROWS <= kMaxBlocks, "Unsupported block size");
41
54.6k
  static_assert(COLS <= kMaxBlocks, "Unsupported block size");
42
54.6k
  float* block = scratch;
43
54.6k
  if (ROWS < COLS) {
44
0
    for (size_t y = 0; y < LF_ROWS; y++) {
45
0
      for (size_t x = 0; x < LF_COLS; x++) {
46
0
        block[y * COLS + x] = input[y * input_stride + x] *
47
0
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(y) *
48
0
                              DCTTotalResampleScale<DCT_COLS, COLS>(x);
49
0
      }
50
0
    }
51
54.6k
  } else {
52
491k
    for (size_t y = 0; y < LF_COLS; y++) {
53
3.93M
      for (size_t x = 0; x < LF_ROWS; x++) {
54
3.49M
        block[y * ROWS + x] = input[y * input_stride + x] *
55
3.49M
                              DCTTotalResampleScale<DCT_COLS, COLS>(y) *
56
3.49M
                              DCTTotalResampleScale<DCT_ROWS, ROWS>(x);
57
3.49M
      }
58
436k
    }
59
54.6k
  }
60
61
54.6k
  float* scratch_space = scratch + kMaxBlocks * kMaxBlocks;
62
54.6k
  ComputeScaledIDCT<ROWS, COLS>()(block, DCTTo(output, output_stride),
63
54.6k
                                  scratch_space);
64
54.6k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 8ul, 2ul, 1ul, 2ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 16ul, 1ul, 2ul, 1ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 16ul, 2ul, 2ul, 2ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 8ul, 4ul, 1ul, 4ul, 1ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<8ul, 32ul, 1ul, 4ul, 1ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 16ul, 4ul, 2ul, 4ul, 2ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<16ul, 32ul, 2ul, 4ul, 2ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 32ul, 4ul, 4ul, 4ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 32ul, 8ul, 4ul, 8ul, 4ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<32ul, 64ul, 4ul, 8ul, 4ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 64ul, 8ul, 8ul, 8ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 64ul, 16ul, 8ul, 16ul, 8ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<64ul, 128ul, 8ul, 16ul, 8ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 128ul, 16ul, 16ul, 16ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 128ul, 32ul, 16ul, 32ul, 16ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<128ul, 256ul, 16ul, 32ul, 16ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::ReinterpretingIDCT<256ul, 256ul, 32ul, 32ul, 32ul, 32ul>(float const*, unsigned long, float*, unsigned long, float*)
65
66
template <size_t S>
67
64.6M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
64.6M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
64.6M
  static_assert(S % 2 == 0, "S should be even");
70
64.6M
  float temp[kDCTBlockSize];
71
64.6M
  constexpr size_t num_2x2 = S / 2;
72
215M
  for (size_t y = 0; y < num_2x2; y++) {
73
603M
    for (size_t x = 0; x < num_2x2; x++) {
74
452M
      float c00 = block[y * 2 * stride + x * 2];
75
452M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
452M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
452M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
452M
      float r00 = c00 + c01 + c10 + c11;
79
452M
      float r01 = c00 + c01 - c10 - c11;
80
452M
      float r10 = c00 - c01 + c10 - c11;
81
452M
      float r11 = c00 - c01 - c10 + c11;
82
452M
      r00 *= 0.25f;
83
452M
      r01 *= 0.25f;
84
452M
      r10 *= 0.25f;
85
452M
      r11 *= 0.25f;
86
452M
      temp[y * kBlockDim + x] = r00;
87
452M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
452M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
452M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
452M
    }
91
150M
  }
92
366M
  for (size_t y = 0; y < S; y++) {
93
2.11G
    for (size_t x = 0; x < S; x++) {
94
1.81G
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
1.81G
    }
96
301M
  }
97
64.6M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.01M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.01M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.01M
  static_assert(S % 2 == 0, "S should be even");
70
1.01M
  float temp[kDCTBlockSize];
71
1.01M
  constexpr size_t num_2x2 = S / 2;
72
5.07M
  for (size_t y = 0; y < num_2x2; y++) {
73
20.2M
    for (size_t x = 0; x < num_2x2; x++) {
74
16.2M
      float c00 = block[y * 2 * stride + x * 2];
75
16.2M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
16.2M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
16.2M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
16.2M
      float r00 = c00 + c01 + c10 + c11;
79
16.2M
      float r01 = c00 + c01 - c10 - c11;
80
16.2M
      float r10 = c00 - c01 + c10 - c11;
81
16.2M
      float r11 = c00 - c01 - c10 + c11;
82
16.2M
      r00 *= 0.25f;
83
16.2M
      r01 *= 0.25f;
84
16.2M
      r10 *= 0.25f;
85
16.2M
      r11 *= 0.25f;
86
16.2M
      temp[y * kBlockDim + x] = r00;
87
16.2M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
16.2M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
16.2M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
16.2M
    }
91
4.05M
  }
92
9.13M
  for (size_t y = 0; y < S; y++) {
93
73.0M
    for (size_t x = 0; x < S; x++) {
94
64.9M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
64.9M
    }
96
8.11M
  }
97
1.01M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.01M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.01M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.01M
  static_assert(S % 2 == 0, "S should be even");
70
1.01M
  float temp[kDCTBlockSize];
71
1.01M
  constexpr size_t num_2x2 = S / 2;
72
3.04M
  for (size_t y = 0; y < num_2x2; y++) {
73
6.08M
    for (size_t x = 0; x < num_2x2; x++) {
74
4.05M
      float c00 = block[y * 2 * stride + x * 2];
75
4.05M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
4.05M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
4.05M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
4.05M
      float r00 = c00 + c01 + c10 + c11;
79
4.05M
      float r01 = c00 + c01 - c10 - c11;
80
4.05M
      float r10 = c00 - c01 + c10 - c11;
81
4.05M
      float r11 = c00 - c01 - c10 + c11;
82
4.05M
      r00 *= 0.25f;
83
4.05M
      r01 *= 0.25f;
84
4.05M
      r10 *= 0.25f;
85
4.05M
      r11 *= 0.25f;
86
4.05M
      temp[y * kBlockDim + x] = r00;
87
4.05M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
4.05M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
4.05M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
4.05M
    }
91
2.02M
  }
92
5.07M
  for (size_t y = 0; y < S; y++) {
93
20.2M
    for (size_t x = 0; x < S; x++) {
94
16.2M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
16.2M
    }
96
4.05M
  }
97
1.01M
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.01M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.01M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.01M
  static_assert(S % 2 == 0, "S should be even");
70
1.01M
  float temp[kDCTBlockSize];
71
1.01M
  constexpr size_t num_2x2 = S / 2;
72
2.02M
  for (size_t y = 0; y < num_2x2; y++) {
73
2.02M
    for (size_t x = 0; x < num_2x2; x++) {
74
1.01M
      float c00 = block[y * 2 * stride + x * 2];
75
1.01M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
1.01M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
1.01M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
1.01M
      float r00 = c00 + c01 + c10 + c11;
79
1.01M
      float r01 = c00 + c01 - c10 - c11;
80
1.01M
      float r10 = c00 - c01 + c10 - c11;
81
1.01M
      float r11 = c00 - c01 - c10 + c11;
82
1.01M
      r00 *= 0.25f;
83
1.01M
      r01 *= 0.25f;
84
1.01M
      r10 *= 0.25f;
85
1.01M
      r11 *= 0.25f;
86
1.01M
      temp[y * kBlockDim + x] = r00;
87
1.01M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
1.01M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
1.01M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
1.01M
    }
91
1.01M
  }
92
3.04M
  for (size_t y = 0; y < S; y++) {
93
6.08M
    for (size_t x = 0; x < S; x++) {
94
4.05M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
4.05M
    }
96
2.02M
  }
97
1.01M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
19.5M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
19.5M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
19.5M
  static_assert(S % 2 == 0, "S should be even");
70
19.5M
  float temp[kDCTBlockSize];
71
19.5M
  constexpr size_t num_2x2 = S / 2;
72
97.6M
  for (size_t y = 0; y < num_2x2; y++) {
73
390M
    for (size_t x = 0; x < num_2x2; x++) {
74
312M
      float c00 = block[y * 2 * stride + x * 2];
75
312M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
312M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
312M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
312M
      float r00 = c00 + c01 + c10 + c11;
79
312M
      float r01 = c00 + c01 - c10 - c11;
80
312M
      float r10 = c00 - c01 + c10 - c11;
81
312M
      float r11 = c00 - c01 - c10 + c11;
82
312M
      r00 *= 0.25f;
83
312M
      r01 *= 0.25f;
84
312M
      r10 *= 0.25f;
85
312M
      r11 *= 0.25f;
86
312M
      temp[y * kBlockDim + x] = r00;
87
312M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
312M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
312M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
312M
    }
91
78.0M
  }
92
175M
  for (size_t y = 0; y < S; y++) {
93
1.40G
    for (size_t x = 0; x < S; x++) {
94
1.24G
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
1.24G
    }
96
156M
  }
97
19.5M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
19.5M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
19.5M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
19.5M
  static_assert(S % 2 == 0, "S should be even");
70
19.5M
  float temp[kDCTBlockSize];
71
19.5M
  constexpr size_t num_2x2 = S / 2;
72
58.5M
  for (size_t y = 0; y < num_2x2; y++) {
73
117M
    for (size_t x = 0; x < num_2x2; x++) {
74
78.0M
      float c00 = block[y * 2 * stride + x * 2];
75
78.0M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
78.0M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
78.0M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
78.0M
      float r00 = c00 + c01 + c10 + c11;
79
78.0M
      float r01 = c00 + c01 - c10 - c11;
80
78.0M
      float r10 = c00 - c01 + c10 - c11;
81
78.0M
      float r11 = c00 - c01 - c10 + c11;
82
78.0M
      r00 *= 0.25f;
83
78.0M
      r01 *= 0.25f;
84
78.0M
      r10 *= 0.25f;
85
78.0M
      r11 *= 0.25f;
86
78.0M
      temp[y * kBlockDim + x] = r00;
87
78.0M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
78.0M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
78.0M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
78.0M
    }
91
39.0M
  }
92
97.6M
  for (size_t y = 0; y < S; y++) {
93
390M
    for (size_t x = 0; x < S; x++) {
94
312M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
312M
    }
96
78.0M
  }
97
19.5M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
19.5M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
19.5M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
19.5M
  static_assert(S % 2 == 0, "S should be even");
70
19.5M
  float temp[kDCTBlockSize];
71
19.5M
  constexpr size_t num_2x2 = S / 2;
72
39.0M
  for (size_t y = 0; y < num_2x2; y++) {
73
39.0M
    for (size_t x = 0; x < num_2x2; x++) {
74
19.5M
      float c00 = block[y * 2 * stride + x * 2];
75
19.5M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
19.5M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
19.5M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
19.5M
      float r00 = c00 + c01 + c10 + c11;
79
19.5M
      float r01 = c00 + c01 - c10 - c11;
80
19.5M
      float r10 = c00 - c01 + c10 - c11;
81
19.5M
      float r11 = c00 - c01 - c10 + c11;
82
19.5M
      r00 *= 0.25f;
83
19.5M
      r01 *= 0.25f;
84
19.5M
      r10 *= 0.25f;
85
19.5M
      r11 *= 0.25f;
86
19.5M
      temp[y * kBlockDim + x] = r00;
87
19.5M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
19.5M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
19.5M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
19.5M
    }
91
19.5M
  }
92
58.5M
  for (size_t y = 0; y < S; y++) {
93
117M
    for (size_t x = 0; x < S; x++) {
94
78.0M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
78.0M
    }
96
39.0M
  }
97
19.5M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.01M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.01M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.01M
  static_assert(S % 2 == 0, "S should be even");
70
1.01M
  float temp[kDCTBlockSize];
71
1.01M
  constexpr size_t num_2x2 = S / 2;
72
5.07M
  for (size_t y = 0; y < num_2x2; y++) {
73
20.2M
    for (size_t x = 0; x < num_2x2; x++) {
74
16.2M
      float c00 = block[y * 2 * stride + x * 2];
75
16.2M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
16.2M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
16.2M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
16.2M
      float r00 = c00 + c01 + c10 + c11;
79
16.2M
      float r01 = c00 + c01 - c10 - c11;
80
16.2M
      float r10 = c00 - c01 + c10 - c11;
81
16.2M
      float r11 = c00 - c01 - c10 + c11;
82
16.2M
      r00 *= 0.25f;
83
16.2M
      r01 *= 0.25f;
84
16.2M
      r10 *= 0.25f;
85
16.2M
      r11 *= 0.25f;
86
16.2M
      temp[y * kBlockDim + x] = r00;
87
16.2M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
16.2M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
16.2M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
16.2M
    }
91
4.05M
  }
92
9.13M
  for (size_t y = 0; y < S; y++) {
93
73.0M
    for (size_t x = 0; x < S; x++) {
94
64.9M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
64.9M
    }
96
8.11M
  }
97
1.01M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.01M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.01M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.01M
  static_assert(S % 2 == 0, "S should be even");
70
1.01M
  float temp[kDCTBlockSize];
71
1.01M
  constexpr size_t num_2x2 = S / 2;
72
3.04M
  for (size_t y = 0; y < num_2x2; y++) {
73
6.08M
    for (size_t x = 0; x < num_2x2; x++) {
74
4.05M
      float c00 = block[y * 2 * stride + x * 2];
75
4.05M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
4.05M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
4.05M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
4.05M
      float r00 = c00 + c01 + c10 + c11;
79
4.05M
      float r01 = c00 + c01 - c10 - c11;
80
4.05M
      float r10 = c00 - c01 + c10 - c11;
81
4.05M
      float r11 = c00 - c01 - c10 + c11;
82
4.05M
      r00 *= 0.25f;
83
4.05M
      r01 *= 0.25f;
84
4.05M
      r10 *= 0.25f;
85
4.05M
      r11 *= 0.25f;
86
4.05M
      temp[y * kBlockDim + x] = r00;
87
4.05M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
4.05M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
4.05M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
4.05M
    }
91
2.02M
  }
92
5.07M
  for (size_t y = 0; y < S; y++) {
93
20.2M
    for (size_t x = 0; x < S; x++) {
94
16.2M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
16.2M
    }
96
4.05M
  }
97
1.01M
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Line
Count
Source
67
1.01M
void DCT2TopBlock(const float* block, size_t stride, float* out) {
68
1.01M
  static_assert(kBlockDim % S == 0, "S should be a divisor of kBlockDim");
69
1.01M
  static_assert(S % 2 == 0, "S should be even");
70
1.01M
  float temp[kDCTBlockSize];
71
1.01M
  constexpr size_t num_2x2 = S / 2;
72
2.02M
  for (size_t y = 0; y < num_2x2; y++) {
73
2.02M
    for (size_t x = 0; x < num_2x2; x++) {
74
1.01M
      float c00 = block[y * 2 * stride + x * 2];
75
1.01M
      float c01 = block[y * 2 * stride + x * 2 + 1];
76
1.01M
      float c10 = block[(y * 2 + 1) * stride + x * 2];
77
1.01M
      float c11 = block[(y * 2 + 1) * stride + x * 2 + 1];
78
1.01M
      float r00 = c00 + c01 + c10 + c11;
79
1.01M
      float r01 = c00 + c01 - c10 - c11;
80
1.01M
      float r10 = c00 - c01 + c10 - c11;
81
1.01M
      float r11 = c00 - c01 - c10 + c11;
82
1.01M
      r00 *= 0.25f;
83
1.01M
      r01 *= 0.25f;
84
1.01M
      r10 *= 0.25f;
85
1.01M
      r11 *= 0.25f;
86
1.01M
      temp[y * kBlockDim + x] = r00;
87
1.01M
      temp[y * kBlockDim + num_2x2 + x] = r01;
88
1.01M
      temp[(y + num_2x2) * kBlockDim + x] = r10;
89
1.01M
      temp[(y + num_2x2) * kBlockDim + num_2x2 + x] = r11;
90
1.01M
    }
91
1.01M
  }
92
3.04M
  for (size_t y = 0; y < S; y++) {
93
6.08M
    for (size_t x = 0; x < S; x++) {
94
4.05M
      out[y * kBlockDim + x] = temp[y * kBlockDim + x];
95
4.05M
    }
96
2.02M
  }
97
1.01M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<8ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<4ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::DCT2TopBlock<2ul>(float const*, unsigned long, float*)
98
99
81.2M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
81.2M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
81.2M
      {
102
81.2M
          0.2500000000000000,
103
81.2M
          0.8769029297991420f,
104
81.2M
          0.0000000000000000,
105
81.2M
          0.0000000000000000,
106
81.2M
          0.0000000000000000,
107
81.2M
          -0.4105377591765233f,
108
81.2M
          0.0000000000000000,
109
81.2M
          0.0000000000000000,
110
81.2M
          0.0000000000000000,
111
81.2M
          0.0000000000000000,
112
81.2M
          0.0000000000000000,
113
81.2M
          0.0000000000000000,
114
81.2M
          0.0000000000000000,
115
81.2M
          0.0000000000000000,
116
81.2M
          0.0000000000000000,
117
81.2M
          0.0000000000000000,
118
81.2M
      },
119
81.2M
      {
120
81.2M
          0.2500000000000000,
121
81.2M
          0.2206518106944235f,
122
81.2M
          0.0000000000000000,
123
81.2M
          0.0000000000000000,
124
81.2M
          -0.7071067811865474f,
125
81.2M
          0.6235485373547691f,
126
81.2M
          0.0000000000000000,
127
81.2M
          0.0000000000000000,
128
81.2M
          0.0000000000000000,
129
81.2M
          0.0000000000000000,
130
81.2M
          0.0000000000000000,
131
81.2M
          0.0000000000000000,
132
81.2M
          0.0000000000000000,
133
81.2M
          0.0000000000000000,
134
81.2M
          0.0000000000000000,
135
81.2M
          0.0000000000000000,
136
81.2M
      },
137
81.2M
      {
138
81.2M
          0.2500000000000000,
139
81.2M
          -0.1014005039375376f,
140
81.2M
          0.4067007583026075f,
141
81.2M
          -0.2125574805828875f,
142
81.2M
          0.0000000000000000,
143
81.2M
          -0.0643507165794627f,
144
81.2M
          -0.4517556589999482f,
145
81.2M
          -0.3046847507248690f,
146
81.2M
          0.3017929516615495f,
147
81.2M
          0.4082482904638627f,
148
81.2M
          0.1747866975480809f,
149
81.2M
          -0.2110560104933578f,
150
81.2M
          -0.1426608480880726f,
151
81.2M
          -0.1381354035075859f,
152
81.2M
          -0.1743760259965107f,
153
81.2M
          0.1135498731499434f,
154
81.2M
      },
155
81.2M
      {
156
81.2M
          0.2500000000000000,
157
81.2M
          -0.1014005039375375f,
158
81.2M
          0.4444481661973445f,
159
81.2M
          0.3085497062849767f,
160
81.2M
          0.0000000000000000f,
161
81.2M
          -0.0643507165794627f,
162
81.2M
          0.1585450355184006f,
163
81.2M
          0.5112616136591823f,
164
81.2M
          0.2579236279634118f,
165
81.2M
          0.0000000000000000,
166
81.2M
          0.0812611176717539f,
167
81.2M
          0.1856718091610980f,
168
81.2M
          -0.3416446842253372f,
169
81.2M
          0.3302282550303788f,
170
81.2M
          0.0702790691196284f,
171
81.2M
          -0.0741750459581035f,
172
81.2M
      },
173
81.2M
      {
174
81.2M
          0.2500000000000000,
175
81.2M
          0.2206518106944236f,
176
81.2M
          0.0000000000000000,
177
81.2M
          0.0000000000000000,
178
81.2M
          0.7071067811865476f,
179
81.2M
          0.6235485373547694f,
180
81.2M
          0.0000000000000000,
181
81.2M
          0.0000000000000000,
182
81.2M
          0.0000000000000000,
183
81.2M
          0.0000000000000000,
184
81.2M
          0.0000000000000000,
185
81.2M
          0.0000000000000000,
186
81.2M
          0.0000000000000000,
187
81.2M
          0.0000000000000000,
188
81.2M
          0.0000000000000000,
189
81.2M
          0.0000000000000000,
190
81.2M
      },
191
81.2M
      {
192
81.2M
          0.2500000000000000,
193
81.2M
          -0.1014005039375378f,
194
81.2M
          0.0000000000000000,
195
81.2M
          0.4706702258572536f,
196
81.2M
          0.0000000000000000,
197
81.2M
          -0.0643507165794628f,
198
81.2M
          -0.0403851516082220f,
199
81.2M
          0.0000000000000000,
200
81.2M
          0.1627234014286620f,
201
81.2M
          0.0000000000000000,
202
81.2M
          0.0000000000000000,
203
81.2M
          0.0000000000000000,
204
81.2M
          0.7367497537172237f,
205
81.2M
          0.0875511500058708f,
206
81.2M
          -0.2921026642334881f,
207
81.2M
          0.1940289303259434f,
208
81.2M
      },
209
81.2M
      {
210
81.2M
          0.2500000000000000,
211
81.2M
          -0.1014005039375377f,
212
81.2M
          0.1957439937204294f,
213
81.2M
          -0.1621205195722993f,
214
81.2M
          0.0000000000000000,
215
81.2M
          -0.0643507165794628f,
216
81.2M
          0.0074182263792424f,
217
81.2M
          -0.2904801297289980f,
218
81.2M
          0.0952002265347504f,
219
81.2M
          0.0000000000000000,
220
81.2M
          -0.3675398009862027f,
221
81.2M
          0.4921585901373873f,
222
81.2M
          0.2462710772207515f,
223
81.2M
          -0.0794670660590957f,
224
81.2M
          0.3623817333531167f,
225
81.2M
          -0.4351904965232280f,
226
81.2M
      },
227
81.2M
      {
228
81.2M
          0.2500000000000000,
229
81.2M
          -0.1014005039375376f,
230
81.2M
          0.2929100136981264f,
231
81.2M
          0.0000000000000000,
232
81.2M
          0.0000000000000000,
233
81.2M
          -0.0643507165794627f,
234
81.2M
          0.3935103426921017f,
235
81.2M
          -0.0657870154914280f,
236
81.2M
          0.0000000000000000,
237
81.2M
          -0.4082482904638628f,
238
81.2M
          -0.3078822139579090f,
239
81.2M
          -0.3852501370925192f,
240
81.2M
          -0.0857401903551931f,
241
81.2M
          -0.4613374887461511f,
242
81.2M
          0.0000000000000000,
243
81.2M
          0.2191868483885747f,
244
81.2M
      },
245
81.2M
      {
246
81.2M
          0.2500000000000000,
247
81.2M
          -0.1014005039375376f,
248
81.2M
          -0.4067007583026072f,
249
81.2M
          -0.2125574805828705f,
250
81.2M
          0.0000000000000000,
251
81.2M
          -0.0643507165794627f,
252
81.2M
          -0.4517556589999464f,
253
81.2M
          0.3046847507248840f,
254
81.2M
          0.3017929516615503f,
255
81.2M
          -0.4082482904638635f,
256
81.2M
          -0.1747866975480813f,
257
81.2M
          0.2110560104933581f,
258
81.2M
          -0.1426608480880734f,
259
81.2M
          -0.1381354035075829f,
260
81.2M
          -0.1743760259965108f,
261
81.2M
          0.1135498731499426f,
262
81.2M
      },
263
81.2M
      {
264
81.2M
          0.2500000000000000,
265
81.2M
          -0.1014005039375377f,
266
81.2M
          -0.1957439937204287f,
267
81.2M
          -0.1621205195722833f,
268
81.2M
          0.0000000000000000,
269
81.2M
          -0.0643507165794628f,
270
81.2M
          0.0074182263792444f,
271
81.2M
          0.2904801297290076f,
272
81.2M
          0.0952002265347505f,
273
81.2M
          0.0000000000000000,
274
81.2M
          0.3675398009862011f,
275
81.2M
          -0.4921585901373891f,
276
81.2M
          0.2462710772207514f,
277
81.2M
          -0.0794670660591026f,
278
81.2M
          0.3623817333531165f,
279
81.2M
          -0.4351904965232251f,
280
81.2M
      },
281
81.2M
      {
282
81.2M
          0.2500000000000000,
283
81.2M
          -0.1014005039375375f,
284
81.2M
          0.0000000000000000,
285
81.2M
          -0.4706702258572528f,
286
81.2M
          0.0000000000000000,
287
81.2M
          -0.0643507165794627f,
288
81.2M
          0.1107416575309343f,
289
81.2M
          0.0000000000000000,
290
81.2M
          -0.1627234014286617f,
291
81.2M
          0.0000000000000000,
292
81.2M
          0.0000000000000000,
293
81.2M
          0.0000000000000000,
294
81.2M
          0.1488339922711357f,
295
81.2M
          0.4972464710953509f,
296
81.2M
          0.2921026642334879f,
297
81.2M
          0.5550443808910661f,
298
81.2M
      },
299
81.2M
      {
300
81.2M
          0.2500000000000000,
301
81.2M
          -0.1014005039375377f,
302
81.2M
          0.1137907446044809f,
303
81.2M
          -0.1464291867126764f,
304
81.2M
          0.0000000000000000,
305
81.2M
          -0.0643507165794628f,
306
81.2M
          0.0829816309488205f,
307
81.2M
          -0.2388977352334460f,
308
81.2M
          -0.3531238544981630f,
309
81.2M
          -0.4082482904638630f,
310
81.2M
          0.4826689115059883f,
311
81.2M
          0.1741941265991622f,
312
81.2M
          -0.0476868035022925f,
313
81.2M
          0.1253805944856366f,
314
81.2M
          -0.4326608024727445f,
315
81.2M
          -0.2546827712406646f,
316
81.2M
      },
317
81.2M
      {
318
81.2M
          0.2500000000000000,
319
81.2M
          -0.1014005039375377f,
320
81.2M
          -0.4444481661973438f,
321
81.2M
          0.3085497062849487f,
322
81.2M
          0.0000000000000000,
323
81.2M
          -0.0643507165794628f,
324
81.2M
          0.1585450355183970f,
325
81.2M
          -0.5112616136592012f,
326
81.2M
          0.2579236279634129f,
327
81.2M
          0.0000000000000000,
328
81.2M
          -0.0812611176717504f,
329
81.2M
          -0.1856718091610990f,
330
81.2M
          -0.3416446842253373f,
331
81.2M
          0.3302282550303805f,
332
81.2M
          0.0702790691196282f,
333
81.2M
          -0.0741750459581023f,
334
81.2M
      },
335
81.2M
      {
336
81.2M
          0.2500000000000000,
337
81.2M
          -0.1014005039375376f,
338
81.2M
          -0.2929100136981264f,
339
81.2M
          0.0000000000000000,
340
81.2M
          0.0000000000000000,
341
81.2M
          -0.0643507165794627f,
342
81.2M
          0.3935103426921022f,
343
81.2M
          0.0657870154914254f,
344
81.2M
          0.0000000000000000,
345
81.2M
          0.4082482904638634f,
346
81.2M
          0.3078822139579031f,
347
81.2M
          0.3852501370925211f,
348
81.2M
          -0.0857401903551927f,
349
81.2M
          -0.4613374887461554f,
350
81.2M
          0.0000000000000000,
351
81.2M
          0.2191868483885728f,
352
81.2M
      },
353
81.2M
      {
354
81.2M
          0.2500000000000000,
355
81.2M
          -0.1014005039375376f,
356
81.2M
          -0.1137907446044814f,
357
81.2M
          -0.1464291867126654f,
358
81.2M
          0.0000000000000000,
359
81.2M
          -0.0643507165794627f,
360
81.2M
          0.0829816309488214f,
361
81.2M
          0.2388977352334547f,
362
81.2M
          -0.3531238544981624f,
363
81.2M
          0.4082482904638630f,
364
81.2M
          -0.4826689115059858f,
365
81.2M
          -0.1741941265991621f,
366
81.2M
          -0.0476868035022928f,
367
81.2M
          0.1253805944856431f,
368
81.2M
          -0.4326608024727457f,
369
81.2M
          -0.2546827712406641f,
370
81.2M
      },
371
81.2M
      {
372
81.2M
          0.2500000000000000,
373
81.2M
          -0.1014005039375374f,
374
81.2M
          0.0000000000000000,
375
81.2M
          0.4251149611657548f,
376
81.2M
          0.0000000000000000,
377
81.2M
          -0.0643507165794626f,
378
81.2M
          -0.4517556589999480f,
379
81.2M
          0.0000000000000000,
380
81.2M
          -0.6035859033230976f,
381
81.2M
          0.0000000000000000,
382
81.2M
          0.0000000000000000,
383
81.2M
          0.0000000000000000,
384
81.2M
          -0.1426608480880724f,
385
81.2M
          -0.1381354035075845f,
386
81.2M
          0.3487520519930227f,
387
81.2M
          0.1135498731499429f,
388
81.2M
      },
389
81.2M
  };
390
391
81.2M
  const HWY_CAPPED(float, 16) d;
392
243M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
162M
    auto scalar = Zero(d);
394
2.76G
    for (size_t j = 0; j < 16; j++) {
395
2.59G
      auto px = Set(d, pixels[j]);
396
2.59G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
2.59G
      scalar = MulAdd(px, basis, scalar);
398
2.59G
    }
399
162M
    Store(scalar, d, coeffs + i);
400
162M
  }
401
81.2M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
1.56M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
1.56M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
1.56M
      {
102
1.56M
          0.2500000000000000,
103
1.56M
          0.8769029297991420f,
104
1.56M
          0.0000000000000000,
105
1.56M
          0.0000000000000000,
106
1.56M
          0.0000000000000000,
107
1.56M
          -0.4105377591765233f,
108
1.56M
          0.0000000000000000,
109
1.56M
          0.0000000000000000,
110
1.56M
          0.0000000000000000,
111
1.56M
          0.0000000000000000,
112
1.56M
          0.0000000000000000,
113
1.56M
          0.0000000000000000,
114
1.56M
          0.0000000000000000,
115
1.56M
          0.0000000000000000,
116
1.56M
          0.0000000000000000,
117
1.56M
          0.0000000000000000,
118
1.56M
      },
119
1.56M
      {
120
1.56M
          0.2500000000000000,
121
1.56M
          0.2206518106944235f,
122
1.56M
          0.0000000000000000,
123
1.56M
          0.0000000000000000,
124
1.56M
          -0.7071067811865474f,
125
1.56M
          0.6235485373547691f,
126
1.56M
          0.0000000000000000,
127
1.56M
          0.0000000000000000,
128
1.56M
          0.0000000000000000,
129
1.56M
          0.0000000000000000,
130
1.56M
          0.0000000000000000,
131
1.56M
          0.0000000000000000,
132
1.56M
          0.0000000000000000,
133
1.56M
          0.0000000000000000,
134
1.56M
          0.0000000000000000,
135
1.56M
          0.0000000000000000,
136
1.56M
      },
137
1.56M
      {
138
1.56M
          0.2500000000000000,
139
1.56M
          -0.1014005039375376f,
140
1.56M
          0.4067007583026075f,
141
1.56M
          -0.2125574805828875f,
142
1.56M
          0.0000000000000000,
143
1.56M
          -0.0643507165794627f,
144
1.56M
          -0.4517556589999482f,
145
1.56M
          -0.3046847507248690f,
146
1.56M
          0.3017929516615495f,
147
1.56M
          0.4082482904638627f,
148
1.56M
          0.1747866975480809f,
149
1.56M
          -0.2110560104933578f,
150
1.56M
          -0.1426608480880726f,
151
1.56M
          -0.1381354035075859f,
152
1.56M
          -0.1743760259965107f,
153
1.56M
          0.1135498731499434f,
154
1.56M
      },
155
1.56M
      {
156
1.56M
          0.2500000000000000,
157
1.56M
          -0.1014005039375375f,
158
1.56M
          0.4444481661973445f,
159
1.56M
          0.3085497062849767f,
160
1.56M
          0.0000000000000000f,
161
1.56M
          -0.0643507165794627f,
162
1.56M
          0.1585450355184006f,
163
1.56M
          0.5112616136591823f,
164
1.56M
          0.2579236279634118f,
165
1.56M
          0.0000000000000000,
166
1.56M
          0.0812611176717539f,
167
1.56M
          0.1856718091610980f,
168
1.56M
          -0.3416446842253372f,
169
1.56M
          0.3302282550303788f,
170
1.56M
          0.0702790691196284f,
171
1.56M
          -0.0741750459581035f,
172
1.56M
      },
173
1.56M
      {
174
1.56M
          0.2500000000000000,
175
1.56M
          0.2206518106944236f,
176
1.56M
          0.0000000000000000,
177
1.56M
          0.0000000000000000,
178
1.56M
          0.7071067811865476f,
179
1.56M
          0.6235485373547694f,
180
1.56M
          0.0000000000000000,
181
1.56M
          0.0000000000000000,
182
1.56M
          0.0000000000000000,
183
1.56M
          0.0000000000000000,
184
1.56M
          0.0000000000000000,
185
1.56M
          0.0000000000000000,
186
1.56M
          0.0000000000000000,
187
1.56M
          0.0000000000000000,
188
1.56M
          0.0000000000000000,
189
1.56M
          0.0000000000000000,
190
1.56M
      },
191
1.56M
      {
192
1.56M
          0.2500000000000000,
193
1.56M
          -0.1014005039375378f,
194
1.56M
          0.0000000000000000,
195
1.56M
          0.4706702258572536f,
196
1.56M
          0.0000000000000000,
197
1.56M
          -0.0643507165794628f,
198
1.56M
          -0.0403851516082220f,
199
1.56M
          0.0000000000000000,
200
1.56M
          0.1627234014286620f,
201
1.56M
          0.0000000000000000,
202
1.56M
          0.0000000000000000,
203
1.56M
          0.0000000000000000,
204
1.56M
          0.7367497537172237f,
205
1.56M
          0.0875511500058708f,
206
1.56M
          -0.2921026642334881f,
207
1.56M
          0.1940289303259434f,
208
1.56M
      },
209
1.56M
      {
210
1.56M
          0.2500000000000000,
211
1.56M
          -0.1014005039375377f,
212
1.56M
          0.1957439937204294f,
213
1.56M
          -0.1621205195722993f,
214
1.56M
          0.0000000000000000,
215
1.56M
          -0.0643507165794628f,
216
1.56M
          0.0074182263792424f,
217
1.56M
          -0.2904801297289980f,
218
1.56M
          0.0952002265347504f,
219
1.56M
          0.0000000000000000,
220
1.56M
          -0.3675398009862027f,
221
1.56M
          0.4921585901373873f,
222
1.56M
          0.2462710772207515f,
223
1.56M
          -0.0794670660590957f,
224
1.56M
          0.3623817333531167f,
225
1.56M
          -0.4351904965232280f,
226
1.56M
      },
227
1.56M
      {
228
1.56M
          0.2500000000000000,
229
1.56M
          -0.1014005039375376f,
230
1.56M
          0.2929100136981264f,
231
1.56M
          0.0000000000000000,
232
1.56M
          0.0000000000000000,
233
1.56M
          -0.0643507165794627f,
234
1.56M
          0.3935103426921017f,
235
1.56M
          -0.0657870154914280f,
236
1.56M
          0.0000000000000000,
237
1.56M
          -0.4082482904638628f,
238
1.56M
          -0.3078822139579090f,
239
1.56M
          -0.3852501370925192f,
240
1.56M
          -0.0857401903551931f,
241
1.56M
          -0.4613374887461511f,
242
1.56M
          0.0000000000000000,
243
1.56M
          0.2191868483885747f,
244
1.56M
      },
245
1.56M
      {
246
1.56M
          0.2500000000000000,
247
1.56M
          -0.1014005039375376f,
248
1.56M
          -0.4067007583026072f,
249
1.56M
          -0.2125574805828705f,
250
1.56M
          0.0000000000000000,
251
1.56M
          -0.0643507165794627f,
252
1.56M
          -0.4517556589999464f,
253
1.56M
          0.3046847507248840f,
254
1.56M
          0.3017929516615503f,
255
1.56M
          -0.4082482904638635f,
256
1.56M
          -0.1747866975480813f,
257
1.56M
          0.2110560104933581f,
258
1.56M
          -0.1426608480880734f,
259
1.56M
          -0.1381354035075829f,
260
1.56M
          -0.1743760259965108f,
261
1.56M
          0.1135498731499426f,
262
1.56M
      },
263
1.56M
      {
264
1.56M
          0.2500000000000000,
265
1.56M
          -0.1014005039375377f,
266
1.56M
          -0.1957439937204287f,
267
1.56M
          -0.1621205195722833f,
268
1.56M
          0.0000000000000000,
269
1.56M
          -0.0643507165794628f,
270
1.56M
          0.0074182263792444f,
271
1.56M
          0.2904801297290076f,
272
1.56M
          0.0952002265347505f,
273
1.56M
          0.0000000000000000,
274
1.56M
          0.3675398009862011f,
275
1.56M
          -0.4921585901373891f,
276
1.56M
          0.2462710772207514f,
277
1.56M
          -0.0794670660591026f,
278
1.56M
          0.3623817333531165f,
279
1.56M
          -0.4351904965232251f,
280
1.56M
      },
281
1.56M
      {
282
1.56M
          0.2500000000000000,
283
1.56M
          -0.1014005039375375f,
284
1.56M
          0.0000000000000000,
285
1.56M
          -0.4706702258572528f,
286
1.56M
          0.0000000000000000,
287
1.56M
          -0.0643507165794627f,
288
1.56M
          0.1107416575309343f,
289
1.56M
          0.0000000000000000,
290
1.56M
          -0.1627234014286617f,
291
1.56M
          0.0000000000000000,
292
1.56M
          0.0000000000000000,
293
1.56M
          0.0000000000000000,
294
1.56M
          0.1488339922711357f,
295
1.56M
          0.4972464710953509f,
296
1.56M
          0.2921026642334879f,
297
1.56M
          0.5550443808910661f,
298
1.56M
      },
299
1.56M
      {
300
1.56M
          0.2500000000000000,
301
1.56M
          -0.1014005039375377f,
302
1.56M
          0.1137907446044809f,
303
1.56M
          -0.1464291867126764f,
304
1.56M
          0.0000000000000000,
305
1.56M
          -0.0643507165794628f,
306
1.56M
          0.0829816309488205f,
307
1.56M
          -0.2388977352334460f,
308
1.56M
          -0.3531238544981630f,
309
1.56M
          -0.4082482904638630f,
310
1.56M
          0.4826689115059883f,
311
1.56M
          0.1741941265991622f,
312
1.56M
          -0.0476868035022925f,
313
1.56M
          0.1253805944856366f,
314
1.56M
          -0.4326608024727445f,
315
1.56M
          -0.2546827712406646f,
316
1.56M
      },
317
1.56M
      {
318
1.56M
          0.2500000000000000,
319
1.56M
          -0.1014005039375377f,
320
1.56M
          -0.4444481661973438f,
321
1.56M
          0.3085497062849487f,
322
1.56M
          0.0000000000000000,
323
1.56M
          -0.0643507165794628f,
324
1.56M
          0.1585450355183970f,
325
1.56M
          -0.5112616136592012f,
326
1.56M
          0.2579236279634129f,
327
1.56M
          0.0000000000000000,
328
1.56M
          -0.0812611176717504f,
329
1.56M
          -0.1856718091610990f,
330
1.56M
          -0.3416446842253373f,
331
1.56M
          0.3302282550303805f,
332
1.56M
          0.0702790691196282f,
333
1.56M
          -0.0741750459581023f,
334
1.56M
      },
335
1.56M
      {
336
1.56M
          0.2500000000000000,
337
1.56M
          -0.1014005039375376f,
338
1.56M
          -0.2929100136981264f,
339
1.56M
          0.0000000000000000,
340
1.56M
          0.0000000000000000,
341
1.56M
          -0.0643507165794627f,
342
1.56M
          0.3935103426921022f,
343
1.56M
          0.0657870154914254f,
344
1.56M
          0.0000000000000000,
345
1.56M
          0.4082482904638634f,
346
1.56M
          0.3078822139579031f,
347
1.56M
          0.3852501370925211f,
348
1.56M
          -0.0857401903551927f,
349
1.56M
          -0.4613374887461554f,
350
1.56M
          0.0000000000000000,
351
1.56M
          0.2191868483885728f,
352
1.56M
      },
353
1.56M
      {
354
1.56M
          0.2500000000000000,
355
1.56M
          -0.1014005039375376f,
356
1.56M
          -0.1137907446044814f,
357
1.56M
          -0.1464291867126654f,
358
1.56M
          0.0000000000000000,
359
1.56M
          -0.0643507165794627f,
360
1.56M
          0.0829816309488214f,
361
1.56M
          0.2388977352334547f,
362
1.56M
          -0.3531238544981624f,
363
1.56M
          0.4082482904638630f,
364
1.56M
          -0.4826689115059858f,
365
1.56M
          -0.1741941265991621f,
366
1.56M
          -0.0476868035022928f,
367
1.56M
          0.1253805944856431f,
368
1.56M
          -0.4326608024727457f,
369
1.56M
          -0.2546827712406641f,
370
1.56M
      },
371
1.56M
      {
372
1.56M
          0.2500000000000000,
373
1.56M
          -0.1014005039375374f,
374
1.56M
          0.0000000000000000,
375
1.56M
          0.4251149611657548f,
376
1.56M
          0.0000000000000000,
377
1.56M
          -0.0643507165794626f,
378
1.56M
          -0.4517556589999480f,
379
1.56M
          0.0000000000000000,
380
1.56M
          -0.6035859033230976f,
381
1.56M
          0.0000000000000000,
382
1.56M
          0.0000000000000000,
383
1.56M
          0.0000000000000000,
384
1.56M
          -0.1426608480880724f,
385
1.56M
          -0.1381354035075845f,
386
1.56M
          0.3487520519930227f,
387
1.56M
          0.1135498731499429f,
388
1.56M
      },
389
1.56M
  };
390
391
1.56M
  const HWY_CAPPED(float, 16) d;
392
4.68M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
3.12M
    auto scalar = Zero(d);
394
53.1M
    for (size_t j = 0; j < 16; j++) {
395
49.9M
      auto px = Set(d, pixels[j]);
396
49.9M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
49.9M
      scalar = MulAdd(px, basis, scalar);
398
49.9M
    }
399
3.12M
    Store(scalar, d, coeffs + i);
400
3.12M
  }
401
1.56M
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
78.0M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
78.0M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
78.0M
      {
102
78.0M
          0.2500000000000000,
103
78.0M
          0.8769029297991420f,
104
78.0M
          0.0000000000000000,
105
78.0M
          0.0000000000000000,
106
78.0M
          0.0000000000000000,
107
78.0M
          -0.4105377591765233f,
108
78.0M
          0.0000000000000000,
109
78.0M
          0.0000000000000000,
110
78.0M
          0.0000000000000000,
111
78.0M
          0.0000000000000000,
112
78.0M
          0.0000000000000000,
113
78.0M
          0.0000000000000000,
114
78.0M
          0.0000000000000000,
115
78.0M
          0.0000000000000000,
116
78.0M
          0.0000000000000000,
117
78.0M
          0.0000000000000000,
118
78.0M
      },
119
78.0M
      {
120
78.0M
          0.2500000000000000,
121
78.0M
          0.2206518106944235f,
122
78.0M
          0.0000000000000000,
123
78.0M
          0.0000000000000000,
124
78.0M
          -0.7071067811865474f,
125
78.0M
          0.6235485373547691f,
126
78.0M
          0.0000000000000000,
127
78.0M
          0.0000000000000000,
128
78.0M
          0.0000000000000000,
129
78.0M
          0.0000000000000000,
130
78.0M
          0.0000000000000000,
131
78.0M
          0.0000000000000000,
132
78.0M
          0.0000000000000000,
133
78.0M
          0.0000000000000000,
134
78.0M
          0.0000000000000000,
135
78.0M
          0.0000000000000000,
136
78.0M
      },
137
78.0M
      {
138
78.0M
          0.2500000000000000,
139
78.0M
          -0.1014005039375376f,
140
78.0M
          0.4067007583026075f,
141
78.0M
          -0.2125574805828875f,
142
78.0M
          0.0000000000000000,
143
78.0M
          -0.0643507165794627f,
144
78.0M
          -0.4517556589999482f,
145
78.0M
          -0.3046847507248690f,
146
78.0M
          0.3017929516615495f,
147
78.0M
          0.4082482904638627f,
148
78.0M
          0.1747866975480809f,
149
78.0M
          -0.2110560104933578f,
150
78.0M
          -0.1426608480880726f,
151
78.0M
          -0.1381354035075859f,
152
78.0M
          -0.1743760259965107f,
153
78.0M
          0.1135498731499434f,
154
78.0M
      },
155
78.0M
      {
156
78.0M
          0.2500000000000000,
157
78.0M
          -0.1014005039375375f,
158
78.0M
          0.4444481661973445f,
159
78.0M
          0.3085497062849767f,
160
78.0M
          0.0000000000000000f,
161
78.0M
          -0.0643507165794627f,
162
78.0M
          0.1585450355184006f,
163
78.0M
          0.5112616136591823f,
164
78.0M
          0.2579236279634118f,
165
78.0M
          0.0000000000000000,
166
78.0M
          0.0812611176717539f,
167
78.0M
          0.1856718091610980f,
168
78.0M
          -0.3416446842253372f,
169
78.0M
          0.3302282550303788f,
170
78.0M
          0.0702790691196284f,
171
78.0M
          -0.0741750459581035f,
172
78.0M
      },
173
78.0M
      {
174
78.0M
          0.2500000000000000,
175
78.0M
          0.2206518106944236f,
176
78.0M
          0.0000000000000000,
177
78.0M
          0.0000000000000000,
178
78.0M
          0.7071067811865476f,
179
78.0M
          0.6235485373547694f,
180
78.0M
          0.0000000000000000,
181
78.0M
          0.0000000000000000,
182
78.0M
          0.0000000000000000,
183
78.0M
          0.0000000000000000,
184
78.0M
          0.0000000000000000,
185
78.0M
          0.0000000000000000,
186
78.0M
          0.0000000000000000,
187
78.0M
          0.0000000000000000,
188
78.0M
          0.0000000000000000,
189
78.0M
          0.0000000000000000,
190
78.0M
      },
191
78.0M
      {
192
78.0M
          0.2500000000000000,
193
78.0M
          -0.1014005039375378f,
194
78.0M
          0.0000000000000000,
195
78.0M
          0.4706702258572536f,
196
78.0M
          0.0000000000000000,
197
78.0M
          -0.0643507165794628f,
198
78.0M
          -0.0403851516082220f,
199
78.0M
          0.0000000000000000,
200
78.0M
          0.1627234014286620f,
201
78.0M
          0.0000000000000000,
202
78.0M
          0.0000000000000000,
203
78.0M
          0.0000000000000000,
204
78.0M
          0.7367497537172237f,
205
78.0M
          0.0875511500058708f,
206
78.0M
          -0.2921026642334881f,
207
78.0M
          0.1940289303259434f,
208
78.0M
      },
209
78.0M
      {
210
78.0M
          0.2500000000000000,
211
78.0M
          -0.1014005039375377f,
212
78.0M
          0.1957439937204294f,
213
78.0M
          -0.1621205195722993f,
214
78.0M
          0.0000000000000000,
215
78.0M
          -0.0643507165794628f,
216
78.0M
          0.0074182263792424f,
217
78.0M
          -0.2904801297289980f,
218
78.0M
          0.0952002265347504f,
219
78.0M
          0.0000000000000000,
220
78.0M
          -0.3675398009862027f,
221
78.0M
          0.4921585901373873f,
222
78.0M
          0.2462710772207515f,
223
78.0M
          -0.0794670660590957f,
224
78.0M
          0.3623817333531167f,
225
78.0M
          -0.4351904965232280f,
226
78.0M
      },
227
78.0M
      {
228
78.0M
          0.2500000000000000,
229
78.0M
          -0.1014005039375376f,
230
78.0M
          0.2929100136981264f,
231
78.0M
          0.0000000000000000,
232
78.0M
          0.0000000000000000,
233
78.0M
          -0.0643507165794627f,
234
78.0M
          0.3935103426921017f,
235
78.0M
          -0.0657870154914280f,
236
78.0M
          0.0000000000000000,
237
78.0M
          -0.4082482904638628f,
238
78.0M
          -0.3078822139579090f,
239
78.0M
          -0.3852501370925192f,
240
78.0M
          -0.0857401903551931f,
241
78.0M
          -0.4613374887461511f,
242
78.0M
          0.0000000000000000,
243
78.0M
          0.2191868483885747f,
244
78.0M
      },
245
78.0M
      {
246
78.0M
          0.2500000000000000,
247
78.0M
          -0.1014005039375376f,
248
78.0M
          -0.4067007583026072f,
249
78.0M
          -0.2125574805828705f,
250
78.0M
          0.0000000000000000,
251
78.0M
          -0.0643507165794627f,
252
78.0M
          -0.4517556589999464f,
253
78.0M
          0.3046847507248840f,
254
78.0M
          0.3017929516615503f,
255
78.0M
          -0.4082482904638635f,
256
78.0M
          -0.1747866975480813f,
257
78.0M
          0.2110560104933581f,
258
78.0M
          -0.1426608480880734f,
259
78.0M
          -0.1381354035075829f,
260
78.0M
          -0.1743760259965108f,
261
78.0M
          0.1135498731499426f,
262
78.0M
      },
263
78.0M
      {
264
78.0M
          0.2500000000000000,
265
78.0M
          -0.1014005039375377f,
266
78.0M
          -0.1957439937204287f,
267
78.0M
          -0.1621205195722833f,
268
78.0M
          0.0000000000000000,
269
78.0M
          -0.0643507165794628f,
270
78.0M
          0.0074182263792444f,
271
78.0M
          0.2904801297290076f,
272
78.0M
          0.0952002265347505f,
273
78.0M
          0.0000000000000000,
274
78.0M
          0.3675398009862011f,
275
78.0M
          -0.4921585901373891f,
276
78.0M
          0.2462710772207514f,
277
78.0M
          -0.0794670660591026f,
278
78.0M
          0.3623817333531165f,
279
78.0M
          -0.4351904965232251f,
280
78.0M
      },
281
78.0M
      {
282
78.0M
          0.2500000000000000,
283
78.0M
          -0.1014005039375375f,
284
78.0M
          0.0000000000000000,
285
78.0M
          -0.4706702258572528f,
286
78.0M
          0.0000000000000000,
287
78.0M
          -0.0643507165794627f,
288
78.0M
          0.1107416575309343f,
289
78.0M
          0.0000000000000000,
290
78.0M
          -0.1627234014286617f,
291
78.0M
          0.0000000000000000,
292
78.0M
          0.0000000000000000,
293
78.0M
          0.0000000000000000,
294
78.0M
          0.1488339922711357f,
295
78.0M
          0.4972464710953509f,
296
78.0M
          0.2921026642334879f,
297
78.0M
          0.5550443808910661f,
298
78.0M
      },
299
78.0M
      {
300
78.0M
          0.2500000000000000,
301
78.0M
          -0.1014005039375377f,
302
78.0M
          0.1137907446044809f,
303
78.0M
          -0.1464291867126764f,
304
78.0M
          0.0000000000000000,
305
78.0M
          -0.0643507165794628f,
306
78.0M
          0.0829816309488205f,
307
78.0M
          -0.2388977352334460f,
308
78.0M
          -0.3531238544981630f,
309
78.0M
          -0.4082482904638630f,
310
78.0M
          0.4826689115059883f,
311
78.0M
          0.1741941265991622f,
312
78.0M
          -0.0476868035022925f,
313
78.0M
          0.1253805944856366f,
314
78.0M
          -0.4326608024727445f,
315
78.0M
          -0.2546827712406646f,
316
78.0M
      },
317
78.0M
      {
318
78.0M
          0.2500000000000000,
319
78.0M
          -0.1014005039375377f,
320
78.0M
          -0.4444481661973438f,
321
78.0M
          0.3085497062849487f,
322
78.0M
          0.0000000000000000,
323
78.0M
          -0.0643507165794628f,
324
78.0M
          0.1585450355183970f,
325
78.0M
          -0.5112616136592012f,
326
78.0M
          0.2579236279634129f,
327
78.0M
          0.0000000000000000,
328
78.0M
          -0.0812611176717504f,
329
78.0M
          -0.1856718091610990f,
330
78.0M
          -0.3416446842253373f,
331
78.0M
          0.3302282550303805f,
332
78.0M
          0.0702790691196282f,
333
78.0M
          -0.0741750459581023f,
334
78.0M
      },
335
78.0M
      {
336
78.0M
          0.2500000000000000,
337
78.0M
          -0.1014005039375376f,
338
78.0M
          -0.2929100136981264f,
339
78.0M
          0.0000000000000000,
340
78.0M
          0.0000000000000000,
341
78.0M
          -0.0643507165794627f,
342
78.0M
          0.3935103426921022f,
343
78.0M
          0.0657870154914254f,
344
78.0M
          0.0000000000000000,
345
78.0M
          0.4082482904638634f,
346
78.0M
          0.3078822139579031f,
347
78.0M
          0.3852501370925211f,
348
78.0M
          -0.0857401903551927f,
349
78.0M
          -0.4613374887461554f,
350
78.0M
          0.0000000000000000,
351
78.0M
          0.2191868483885728f,
352
78.0M
      },
353
78.0M
      {
354
78.0M
          0.2500000000000000,
355
78.0M
          -0.1014005039375376f,
356
78.0M
          -0.1137907446044814f,
357
78.0M
          -0.1464291867126654f,
358
78.0M
          0.0000000000000000,
359
78.0M
          -0.0643507165794627f,
360
78.0M
          0.0829816309488214f,
361
78.0M
          0.2388977352334547f,
362
78.0M
          -0.3531238544981624f,
363
78.0M
          0.4082482904638630f,
364
78.0M
          -0.4826689115059858f,
365
78.0M
          -0.1741941265991621f,
366
78.0M
          -0.0476868035022928f,
367
78.0M
          0.1253805944856431f,
368
78.0M
          -0.4326608024727457f,
369
78.0M
          -0.2546827712406641f,
370
78.0M
      },
371
78.0M
      {
372
78.0M
          0.2500000000000000,
373
78.0M
          -0.1014005039375374f,
374
78.0M
          0.0000000000000000,
375
78.0M
          0.4251149611657548f,
376
78.0M
          0.0000000000000000,
377
78.0M
          -0.0643507165794626f,
378
78.0M
          -0.4517556589999480f,
379
78.0M
          0.0000000000000000,
380
78.0M
          -0.6035859033230976f,
381
78.0M
          0.0000000000000000,
382
78.0M
          0.0000000000000000,
383
78.0M
          0.0000000000000000,
384
78.0M
          -0.1426608480880724f,
385
78.0M
          -0.1381354035075845f,
386
78.0M
          0.3487520519930227f,
387
78.0M
          0.1135498731499429f,
388
78.0M
      },
389
78.0M
  };
390
391
78.0M
  const HWY_CAPPED(float, 16) d;
392
234M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
156M
    auto scalar = Zero(d);
394
2.65G
    for (size_t j = 0; j < 16; j++) {
395
2.49G
      auto px = Set(d, pixels[j]);
396
2.49G
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
2.49G
      scalar = MulAdd(px, basis, scalar);
398
2.49G
    }
399
156M
    Store(scalar, d, coeffs + i);
400
156M
  }
401
78.0M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Line
Count
Source
99
1.56M
void AFVDCT4x4(const float* JXL_RESTRICT pixels, float* JXL_RESTRICT coeffs) {
100
1.56M
  HWY_ALIGN static constexpr float k4x4AFVBasisTranspose[16][16] = {
101
1.56M
      {
102
1.56M
          0.2500000000000000,
103
1.56M
          0.8769029297991420f,
104
1.56M
          0.0000000000000000,
105
1.56M
          0.0000000000000000,
106
1.56M
          0.0000000000000000,
107
1.56M
          -0.4105377591765233f,
108
1.56M
          0.0000000000000000,
109
1.56M
          0.0000000000000000,
110
1.56M
          0.0000000000000000,
111
1.56M
          0.0000000000000000,
112
1.56M
          0.0000000000000000,
113
1.56M
          0.0000000000000000,
114
1.56M
          0.0000000000000000,
115
1.56M
          0.0000000000000000,
116
1.56M
          0.0000000000000000,
117
1.56M
          0.0000000000000000,
118
1.56M
      },
119
1.56M
      {
120
1.56M
          0.2500000000000000,
121
1.56M
          0.2206518106944235f,
122
1.56M
          0.0000000000000000,
123
1.56M
          0.0000000000000000,
124
1.56M
          -0.7071067811865474f,
125
1.56M
          0.6235485373547691f,
126
1.56M
          0.0000000000000000,
127
1.56M
          0.0000000000000000,
128
1.56M
          0.0000000000000000,
129
1.56M
          0.0000000000000000,
130
1.56M
          0.0000000000000000,
131
1.56M
          0.0000000000000000,
132
1.56M
          0.0000000000000000,
133
1.56M
          0.0000000000000000,
134
1.56M
          0.0000000000000000,
135
1.56M
          0.0000000000000000,
136
1.56M
      },
137
1.56M
      {
138
1.56M
          0.2500000000000000,
139
1.56M
          -0.1014005039375376f,
140
1.56M
          0.4067007583026075f,
141
1.56M
          -0.2125574805828875f,
142
1.56M
          0.0000000000000000,
143
1.56M
          -0.0643507165794627f,
144
1.56M
          -0.4517556589999482f,
145
1.56M
          -0.3046847507248690f,
146
1.56M
          0.3017929516615495f,
147
1.56M
          0.4082482904638627f,
148
1.56M
          0.1747866975480809f,
149
1.56M
          -0.2110560104933578f,
150
1.56M
          -0.1426608480880726f,
151
1.56M
          -0.1381354035075859f,
152
1.56M
          -0.1743760259965107f,
153
1.56M
          0.1135498731499434f,
154
1.56M
      },
155
1.56M
      {
156
1.56M
          0.2500000000000000,
157
1.56M
          -0.1014005039375375f,
158
1.56M
          0.4444481661973445f,
159
1.56M
          0.3085497062849767f,
160
1.56M
          0.0000000000000000f,
161
1.56M
          -0.0643507165794627f,
162
1.56M
          0.1585450355184006f,
163
1.56M
          0.5112616136591823f,
164
1.56M
          0.2579236279634118f,
165
1.56M
          0.0000000000000000,
166
1.56M
          0.0812611176717539f,
167
1.56M
          0.1856718091610980f,
168
1.56M
          -0.3416446842253372f,
169
1.56M
          0.3302282550303788f,
170
1.56M
          0.0702790691196284f,
171
1.56M
          -0.0741750459581035f,
172
1.56M
      },
173
1.56M
      {
174
1.56M
          0.2500000000000000,
175
1.56M
          0.2206518106944236f,
176
1.56M
          0.0000000000000000,
177
1.56M
          0.0000000000000000,
178
1.56M
          0.7071067811865476f,
179
1.56M
          0.6235485373547694f,
180
1.56M
          0.0000000000000000,
181
1.56M
          0.0000000000000000,
182
1.56M
          0.0000000000000000,
183
1.56M
          0.0000000000000000,
184
1.56M
          0.0000000000000000,
185
1.56M
          0.0000000000000000,
186
1.56M
          0.0000000000000000,
187
1.56M
          0.0000000000000000,
188
1.56M
          0.0000000000000000,
189
1.56M
          0.0000000000000000,
190
1.56M
      },
191
1.56M
      {
192
1.56M
          0.2500000000000000,
193
1.56M
          -0.1014005039375378f,
194
1.56M
          0.0000000000000000,
195
1.56M
          0.4706702258572536f,
196
1.56M
          0.0000000000000000,
197
1.56M
          -0.0643507165794628f,
198
1.56M
          -0.0403851516082220f,
199
1.56M
          0.0000000000000000,
200
1.56M
          0.1627234014286620f,
201
1.56M
          0.0000000000000000,
202
1.56M
          0.0000000000000000,
203
1.56M
          0.0000000000000000,
204
1.56M
          0.7367497537172237f,
205
1.56M
          0.0875511500058708f,
206
1.56M
          -0.2921026642334881f,
207
1.56M
          0.1940289303259434f,
208
1.56M
      },
209
1.56M
      {
210
1.56M
          0.2500000000000000,
211
1.56M
          -0.1014005039375377f,
212
1.56M
          0.1957439937204294f,
213
1.56M
          -0.1621205195722993f,
214
1.56M
          0.0000000000000000,
215
1.56M
          -0.0643507165794628f,
216
1.56M
          0.0074182263792424f,
217
1.56M
          -0.2904801297289980f,
218
1.56M
          0.0952002265347504f,
219
1.56M
          0.0000000000000000,
220
1.56M
          -0.3675398009862027f,
221
1.56M
          0.4921585901373873f,
222
1.56M
          0.2462710772207515f,
223
1.56M
          -0.0794670660590957f,
224
1.56M
          0.3623817333531167f,
225
1.56M
          -0.4351904965232280f,
226
1.56M
      },
227
1.56M
      {
228
1.56M
          0.2500000000000000,
229
1.56M
          -0.1014005039375376f,
230
1.56M
          0.2929100136981264f,
231
1.56M
          0.0000000000000000,
232
1.56M
          0.0000000000000000,
233
1.56M
          -0.0643507165794627f,
234
1.56M
          0.3935103426921017f,
235
1.56M
          -0.0657870154914280f,
236
1.56M
          0.0000000000000000,
237
1.56M
          -0.4082482904638628f,
238
1.56M
          -0.3078822139579090f,
239
1.56M
          -0.3852501370925192f,
240
1.56M
          -0.0857401903551931f,
241
1.56M
          -0.4613374887461511f,
242
1.56M
          0.0000000000000000,
243
1.56M
          0.2191868483885747f,
244
1.56M
      },
245
1.56M
      {
246
1.56M
          0.2500000000000000,
247
1.56M
          -0.1014005039375376f,
248
1.56M
          -0.4067007583026072f,
249
1.56M
          -0.2125574805828705f,
250
1.56M
          0.0000000000000000,
251
1.56M
          -0.0643507165794627f,
252
1.56M
          -0.4517556589999464f,
253
1.56M
          0.3046847507248840f,
254
1.56M
          0.3017929516615503f,
255
1.56M
          -0.4082482904638635f,
256
1.56M
          -0.1747866975480813f,
257
1.56M
          0.2110560104933581f,
258
1.56M
          -0.1426608480880734f,
259
1.56M
          -0.1381354035075829f,
260
1.56M
          -0.1743760259965108f,
261
1.56M
          0.1135498731499426f,
262
1.56M
      },
263
1.56M
      {
264
1.56M
          0.2500000000000000,
265
1.56M
          -0.1014005039375377f,
266
1.56M
          -0.1957439937204287f,
267
1.56M
          -0.1621205195722833f,
268
1.56M
          0.0000000000000000,
269
1.56M
          -0.0643507165794628f,
270
1.56M
          0.0074182263792444f,
271
1.56M
          0.2904801297290076f,
272
1.56M
          0.0952002265347505f,
273
1.56M
          0.0000000000000000,
274
1.56M
          0.3675398009862011f,
275
1.56M
          -0.4921585901373891f,
276
1.56M
          0.2462710772207514f,
277
1.56M
          -0.0794670660591026f,
278
1.56M
          0.3623817333531165f,
279
1.56M
          -0.4351904965232251f,
280
1.56M
      },
281
1.56M
      {
282
1.56M
          0.2500000000000000,
283
1.56M
          -0.1014005039375375f,
284
1.56M
          0.0000000000000000,
285
1.56M
          -0.4706702258572528f,
286
1.56M
          0.0000000000000000,
287
1.56M
          -0.0643507165794627f,
288
1.56M
          0.1107416575309343f,
289
1.56M
          0.0000000000000000,
290
1.56M
          -0.1627234014286617f,
291
1.56M
          0.0000000000000000,
292
1.56M
          0.0000000000000000,
293
1.56M
          0.0000000000000000,
294
1.56M
          0.1488339922711357f,
295
1.56M
          0.4972464710953509f,
296
1.56M
          0.2921026642334879f,
297
1.56M
          0.5550443808910661f,
298
1.56M
      },
299
1.56M
      {
300
1.56M
          0.2500000000000000,
301
1.56M
          -0.1014005039375377f,
302
1.56M
          0.1137907446044809f,
303
1.56M
          -0.1464291867126764f,
304
1.56M
          0.0000000000000000,
305
1.56M
          -0.0643507165794628f,
306
1.56M
          0.0829816309488205f,
307
1.56M
          -0.2388977352334460f,
308
1.56M
          -0.3531238544981630f,
309
1.56M
          -0.4082482904638630f,
310
1.56M
          0.4826689115059883f,
311
1.56M
          0.1741941265991622f,
312
1.56M
          -0.0476868035022925f,
313
1.56M
          0.1253805944856366f,
314
1.56M
          -0.4326608024727445f,
315
1.56M
          -0.2546827712406646f,
316
1.56M
      },
317
1.56M
      {
318
1.56M
          0.2500000000000000,
319
1.56M
          -0.1014005039375377f,
320
1.56M
          -0.4444481661973438f,
321
1.56M
          0.3085497062849487f,
322
1.56M
          0.0000000000000000,
323
1.56M
          -0.0643507165794628f,
324
1.56M
          0.1585450355183970f,
325
1.56M
          -0.5112616136592012f,
326
1.56M
          0.2579236279634129f,
327
1.56M
          0.0000000000000000,
328
1.56M
          -0.0812611176717504f,
329
1.56M
          -0.1856718091610990f,
330
1.56M
          -0.3416446842253373f,
331
1.56M
          0.3302282550303805f,
332
1.56M
          0.0702790691196282f,
333
1.56M
          -0.0741750459581023f,
334
1.56M
      },
335
1.56M
      {
336
1.56M
          0.2500000000000000,
337
1.56M
          -0.1014005039375376f,
338
1.56M
          -0.2929100136981264f,
339
1.56M
          0.0000000000000000,
340
1.56M
          0.0000000000000000,
341
1.56M
          -0.0643507165794627f,
342
1.56M
          0.3935103426921022f,
343
1.56M
          0.0657870154914254f,
344
1.56M
          0.0000000000000000,
345
1.56M
          0.4082482904638634f,
346
1.56M
          0.3078822139579031f,
347
1.56M
          0.3852501370925211f,
348
1.56M
          -0.0857401903551927f,
349
1.56M
          -0.4613374887461554f,
350
1.56M
          0.0000000000000000,
351
1.56M
          0.2191868483885728f,
352
1.56M
      },
353
1.56M
      {
354
1.56M
          0.2500000000000000,
355
1.56M
          -0.1014005039375376f,
356
1.56M
          -0.1137907446044814f,
357
1.56M
          -0.1464291867126654f,
358
1.56M
          0.0000000000000000,
359
1.56M
          -0.0643507165794627f,
360
1.56M
          0.0829816309488214f,
361
1.56M
          0.2388977352334547f,
362
1.56M
          -0.3531238544981624f,
363
1.56M
          0.4082482904638630f,
364
1.56M
          -0.4826689115059858f,
365
1.56M
          -0.1741941265991621f,
366
1.56M
          -0.0476868035022928f,
367
1.56M
          0.1253805944856431f,
368
1.56M
          -0.4326608024727457f,
369
1.56M
          -0.2546827712406641f,
370
1.56M
      },
371
1.56M
      {
372
1.56M
          0.2500000000000000,
373
1.56M
          -0.1014005039375374f,
374
1.56M
          0.0000000000000000,
375
1.56M
          0.4251149611657548f,
376
1.56M
          0.0000000000000000,
377
1.56M
          -0.0643507165794626f,
378
1.56M
          -0.4517556589999480f,
379
1.56M
          0.0000000000000000,
380
1.56M
          -0.6035859033230976f,
381
1.56M
          0.0000000000000000,
382
1.56M
          0.0000000000000000,
383
1.56M
          0.0000000000000000,
384
1.56M
          -0.1426608480880724f,
385
1.56M
          -0.1381354035075845f,
386
1.56M
          0.3487520519930227f,
387
1.56M
          0.1135498731499429f,
388
1.56M
      },
389
1.56M
  };
390
391
1.56M
  const HWY_CAPPED(float, 16) d;
392
4.68M
  for (size_t i = 0; i < 16; i += Lanes(d)) {
393
3.12M
    auto scalar = Zero(d);
394
53.1M
    for (size_t j = 0; j < 16; j++) {
395
49.9M
      auto px = Set(d, pixels[j]);
396
49.9M
      auto basis = Load(d, k4x4AFVBasisTranspose[j] + i);
397
49.9M
      scalar = MulAdd(px, basis, scalar);
398
49.9M
    }
399
3.12M
    Store(scalar, d, coeffs + i);
400
3.12M
  }
401
1.56M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::AFVDCT4x4(float const*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::AFVDCT4x4(float const*, float*)
402
403
// Coefficient layout:
404
//  - (even, even) positions hold AFV coefficients
405
//  - (odd, even) positions hold DCT4x4 coefficients
406
//  - (any, odd) positions hold DCT4x8 coefficients
407
template <size_t afv_kind>
408
void AFVTransformFromPixels(const float* JXL_RESTRICT pixels,
409
                            size_t pixels_stride,
410
81.2M
                            float* JXL_RESTRICT coefficients) {
411
81.2M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
81.2M
  size_t afv_x = afv_kind & 1;
413
81.2M
  size_t afv_y = afv_kind / 2;
414
81.2M
  HWY_ALIGN float block[4 * 8] = {};
415
406M
  for (size_t iy = 0; iy < 4; iy++) {
416
1.62G
    for (size_t ix = 0; ix < 4; ix++) {
417
1.29G
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
1.29G
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
1.29G
    }
420
324M
  }
421
  // AFV coefficients in (even, even) positions.
422
81.2M
  HWY_ALIGN float coeff[4 * 4];
423
81.2M
  AFVDCT4x4(block, coeff);
424
406M
  for (size_t iy = 0; iy < 4; iy++) {
425
1.62G
    for (size_t ix = 0; ix < 4; ix++) {
426
1.29G
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
1.29G
    }
428
324M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
81.2M
  ComputeScaledDCT<4, 4>()(
431
81.2M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
81.2M
              pixels_stride),
433
81.2M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
406M
  for (size_t iy = 0; iy < 4; iy++) {
436
2.92G
    for (size_t ix = 0; ix < 8; ix++) {
437
2.59G
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
2.59G
    }
439
324M
  }
440
  // 4x8 DCT of the other half of the block.
441
81.2M
  ComputeScaledDCT<4, 8>()(
442
81.2M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
81.2M
      block, scratch_space);
444
406M
  for (size_t iy = 0; iy < 4; iy++) {
445
2.92G
    for (size_t ix = 0; ix < 8; ix++) {
446
2.59G
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
2.59G
    }
448
324M
  }
449
81.2M
  float block00 = coefficients[0] * 0.25f;
450
81.2M
  float block01 = coefficients[1];
451
81.2M
  float block10 = coefficients[8];
452
81.2M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
81.2M
  coefficients[1] = (block00 - block01) * 0.5f;
454
81.2M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
81.2M
}
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
293k
                            float* JXL_RESTRICT coefficients) {
411
293k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
293k
  size_t afv_x = afv_kind & 1;
413
293k
  size_t afv_y = afv_kind / 2;
414
293k
  HWY_ALIGN float block[4 * 8] = {};
415
1.46M
  for (size_t iy = 0; iy < 4; iy++) {
416
5.86M
    for (size_t ix = 0; ix < 4; ix++) {
417
4.69M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
4.69M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
4.69M
    }
420
1.17M
  }
421
  // AFV coefficients in (even, even) positions.
422
293k
  HWY_ALIGN float coeff[4 * 4];
423
293k
  AFVDCT4x4(block, coeff);
424
1.46M
  for (size_t iy = 0; iy < 4; iy++) {
425
5.86M
    for (size_t ix = 0; ix < 4; ix++) {
426
4.69M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
4.69M
    }
428
1.17M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
293k
  ComputeScaledDCT<4, 4>()(
431
293k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
293k
              pixels_stride),
433
293k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.46M
  for (size_t iy = 0; iy < 4; iy++) {
436
10.5M
    for (size_t ix = 0; ix < 8; ix++) {
437
9.38M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
9.38M
    }
439
1.17M
  }
440
  // 4x8 DCT of the other half of the block.
441
293k
  ComputeScaledDCT<4, 8>()(
442
293k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
293k
      block, scratch_space);
444
1.46M
  for (size_t iy = 0; iy < 4; iy++) {
445
10.5M
    for (size_t ix = 0; ix < 8; ix++) {
446
9.38M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
9.38M
    }
448
1.17M
  }
449
293k
  float block00 = coefficients[0] * 0.25f;
450
293k
  float block01 = coefficients[1];
451
293k
  float block10 = coefficients[8];
452
293k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
293k
  coefficients[1] = (block00 - block01) * 0.5f;
454
293k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
293k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
655k
                            float* JXL_RESTRICT coefficients) {
411
655k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
655k
  size_t afv_x = afv_kind & 1;
413
655k
  size_t afv_y = afv_kind / 2;
414
655k
  HWY_ALIGN float block[4 * 8] = {};
415
3.27M
  for (size_t iy = 0; iy < 4; iy++) {
416
13.1M
    for (size_t ix = 0; ix < 4; ix++) {
417
10.4M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
10.4M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
10.4M
    }
420
2.62M
  }
421
  // AFV coefficients in (even, even) positions.
422
655k
  HWY_ALIGN float coeff[4 * 4];
423
655k
  AFVDCT4x4(block, coeff);
424
3.27M
  for (size_t iy = 0; iy < 4; iy++) {
425
13.1M
    for (size_t ix = 0; ix < 4; ix++) {
426
10.4M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
10.4M
    }
428
2.62M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
655k
  ComputeScaledDCT<4, 4>()(
431
655k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
655k
              pixels_stride),
433
655k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
3.27M
  for (size_t iy = 0; iy < 4; iy++) {
436
23.5M
    for (size_t ix = 0; ix < 8; ix++) {
437
20.9M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
20.9M
    }
439
2.62M
  }
440
  // 4x8 DCT of the other half of the block.
441
655k
  ComputeScaledDCT<4, 8>()(
442
655k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
655k
      block, scratch_space);
444
3.27M
  for (size_t iy = 0; iy < 4; iy++) {
445
23.5M
    for (size_t ix = 0; ix < 8; ix++) {
446
20.9M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
20.9M
    }
448
2.62M
  }
449
655k
  float block00 = coefficients[0] * 0.25f;
450
655k
  float block01 = coefficients[1];
451
655k
  float block10 = coefficients[8];
452
655k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
655k
  coefficients[1] = (block00 - block01) * 0.5f;
454
655k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
655k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
236k
                            float* JXL_RESTRICT coefficients) {
411
236k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
236k
  size_t afv_x = afv_kind & 1;
413
236k
  size_t afv_y = afv_kind / 2;
414
236k
  HWY_ALIGN float block[4 * 8] = {};
415
1.18M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.73M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.79M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
3.79M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
3.79M
    }
420
947k
  }
421
  // AFV coefficients in (even, even) positions.
422
236k
  HWY_ALIGN float coeff[4 * 4];
423
236k
  AFVDCT4x4(block, coeff);
424
1.18M
  for (size_t iy = 0; iy < 4; iy++) {
425
4.73M
    for (size_t ix = 0; ix < 4; ix++) {
426
3.79M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
3.79M
    }
428
947k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
236k
  ComputeScaledDCT<4, 4>()(
431
236k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
236k
              pixels_stride),
433
236k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.18M
  for (size_t iy = 0; iy < 4; iy++) {
436
8.53M
    for (size_t ix = 0; ix < 8; ix++) {
437
7.58M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
7.58M
    }
439
947k
  }
440
  // 4x8 DCT of the other half of the block.
441
236k
  ComputeScaledDCT<4, 8>()(
442
236k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
236k
      block, scratch_space);
444
1.18M
  for (size_t iy = 0; iy < 4; iy++) {
445
8.53M
    for (size_t ix = 0; ix < 8; ix++) {
446
7.58M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
7.58M
    }
448
947k
  }
449
236k
  float block00 = coefficients[0] * 0.25f;
450
236k
  float block01 = coefficients[1];
451
236k
  float block10 = coefficients[8];
452
236k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
236k
  coefficients[1] = (block00 - block01) * 0.5f;
454
236k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
236k
}
enc_group.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
376k
                            float* JXL_RESTRICT coefficients) {
411
376k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
376k
  size_t afv_x = afv_kind & 1;
413
376k
  size_t afv_y = afv_kind / 2;
414
376k
  HWY_ALIGN float block[4 * 8] = {};
415
1.88M
  for (size_t iy = 0; iy < 4; iy++) {
416
7.52M
    for (size_t ix = 0; ix < 4; ix++) {
417
6.02M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
6.02M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
6.02M
    }
420
1.50M
  }
421
  // AFV coefficients in (even, even) positions.
422
376k
  HWY_ALIGN float coeff[4 * 4];
423
376k
  AFVDCT4x4(block, coeff);
424
1.88M
  for (size_t iy = 0; iy < 4; iy++) {
425
7.52M
    for (size_t ix = 0; ix < 4; ix++) {
426
6.02M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
6.02M
    }
428
1.50M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
376k
  ComputeScaledDCT<4, 4>()(
431
376k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
376k
              pixels_stride),
433
376k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.88M
  for (size_t iy = 0; iy < 4; iy++) {
436
13.5M
    for (size_t ix = 0; ix < 8; ix++) {
437
12.0M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
12.0M
    }
439
1.50M
  }
440
  // 4x8 DCT of the other half of the block.
441
376k
  ComputeScaledDCT<4, 8>()(
442
376k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
376k
      block, scratch_space);
444
1.88M
  for (size_t iy = 0; iy < 4; iy++) {
445
13.5M
    for (size_t ix = 0; ix < 8; ix++) {
446
12.0M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
12.0M
    }
448
1.50M
  }
449
376k
  float block00 = coefficients[0] * 0.25f;
450
376k
  float block01 = coefficients[1];
451
376k
  float block10 = coefficients[8];
452
376k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
376k
  coefficients[1] = (block00 - block01) * 0.5f;
454
376k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
376k
}
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
19.5M
                            float* JXL_RESTRICT coefficients) {
411
19.5M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
19.5M
  size_t afv_x = afv_kind & 1;
413
19.5M
  size_t afv_y = afv_kind / 2;
414
19.5M
  HWY_ALIGN float block[4 * 8] = {};
415
97.6M
  for (size_t iy = 0; iy < 4; iy++) {
416
390M
    for (size_t ix = 0; ix < 4; ix++) {
417
312M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
312M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
312M
    }
420
78.0M
  }
421
  // AFV coefficients in (even, even) positions.
422
19.5M
  HWY_ALIGN float coeff[4 * 4];
423
19.5M
  AFVDCT4x4(block, coeff);
424
97.6M
  for (size_t iy = 0; iy < 4; iy++) {
425
390M
    for (size_t ix = 0; ix < 4; ix++) {
426
312M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
312M
    }
428
78.0M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
19.5M
  ComputeScaledDCT<4, 4>()(
431
19.5M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
19.5M
              pixels_stride),
433
19.5M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
97.6M
  for (size_t iy = 0; iy < 4; iy++) {
436
702M
    for (size_t ix = 0; ix < 8; ix++) {
437
624M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
624M
    }
439
78.0M
  }
440
  // 4x8 DCT of the other half of the block.
441
19.5M
  ComputeScaledDCT<4, 8>()(
442
19.5M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
19.5M
      block, scratch_space);
444
97.6M
  for (size_t iy = 0; iy < 4; iy++) {
445
702M
    for (size_t ix = 0; ix < 8; ix++) {
446
624M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
624M
    }
448
78.0M
  }
449
19.5M
  float block00 = coefficients[0] * 0.25f;
450
19.5M
  float block01 = coefficients[1];
451
19.5M
  float block10 = coefficients[8];
452
19.5M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
19.5M
  coefficients[1] = (block00 - block01) * 0.5f;
454
19.5M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
19.5M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
19.5M
                            float* JXL_RESTRICT coefficients) {
411
19.5M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
19.5M
  size_t afv_x = afv_kind & 1;
413
19.5M
  size_t afv_y = afv_kind / 2;
414
19.5M
  HWY_ALIGN float block[4 * 8] = {};
415
97.6M
  for (size_t iy = 0; iy < 4; iy++) {
416
390M
    for (size_t ix = 0; ix < 4; ix++) {
417
312M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
312M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
312M
    }
420
78.0M
  }
421
  // AFV coefficients in (even, even) positions.
422
19.5M
  HWY_ALIGN float coeff[4 * 4];
423
19.5M
  AFVDCT4x4(block, coeff);
424
97.6M
  for (size_t iy = 0; iy < 4; iy++) {
425
390M
    for (size_t ix = 0; ix < 4; ix++) {
426
312M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
312M
    }
428
78.0M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
19.5M
  ComputeScaledDCT<4, 4>()(
431
19.5M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
19.5M
              pixels_stride),
433
19.5M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
97.6M
  for (size_t iy = 0; iy < 4; iy++) {
436
702M
    for (size_t ix = 0; ix < 8; ix++) {
437
624M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
624M
    }
439
78.0M
  }
440
  // 4x8 DCT of the other half of the block.
441
19.5M
  ComputeScaledDCT<4, 8>()(
442
19.5M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
19.5M
      block, scratch_space);
444
97.6M
  for (size_t iy = 0; iy < 4; iy++) {
445
702M
    for (size_t ix = 0; ix < 8; ix++) {
446
624M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
624M
    }
448
78.0M
  }
449
19.5M
  float block00 = coefficients[0] * 0.25f;
450
19.5M
  float block01 = coefficients[1];
451
19.5M
  float block10 = coefficients[8];
452
19.5M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
19.5M
  coefficients[1] = (block00 - block01) * 0.5f;
454
19.5M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
19.5M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
19.5M
                            float* JXL_RESTRICT coefficients) {
411
19.5M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
19.5M
  size_t afv_x = afv_kind & 1;
413
19.5M
  size_t afv_y = afv_kind / 2;
414
19.5M
  HWY_ALIGN float block[4 * 8] = {};
415
97.6M
  for (size_t iy = 0; iy < 4; iy++) {
416
390M
    for (size_t ix = 0; ix < 4; ix++) {
417
312M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
312M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
312M
    }
420
78.0M
  }
421
  // AFV coefficients in (even, even) positions.
422
19.5M
  HWY_ALIGN float coeff[4 * 4];
423
19.5M
  AFVDCT4x4(block, coeff);
424
97.6M
  for (size_t iy = 0; iy < 4; iy++) {
425
390M
    for (size_t ix = 0; ix < 4; ix++) {
426
312M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
312M
    }
428
78.0M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
19.5M
  ComputeScaledDCT<4, 4>()(
431
19.5M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
19.5M
              pixels_stride),
433
19.5M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
97.6M
  for (size_t iy = 0; iy < 4; iy++) {
436
702M
    for (size_t ix = 0; ix < 8; ix++) {
437
624M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
624M
    }
439
78.0M
  }
440
  // 4x8 DCT of the other half of the block.
441
19.5M
  ComputeScaledDCT<4, 8>()(
442
19.5M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
19.5M
      block, scratch_space);
444
97.6M
  for (size_t iy = 0; iy < 4; iy++) {
445
702M
    for (size_t ix = 0; ix < 8; ix++) {
446
624M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
624M
    }
448
78.0M
  }
449
19.5M
  float block00 = coefficients[0] * 0.25f;
450
19.5M
  float block01 = coefficients[1];
451
19.5M
  float block10 = coefficients[8];
452
19.5M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
19.5M
  coefficients[1] = (block00 - block01) * 0.5f;
454
19.5M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
19.5M
}
enc_ac_strategy.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
19.5M
                            float* JXL_RESTRICT coefficients) {
411
19.5M
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
19.5M
  size_t afv_x = afv_kind & 1;
413
19.5M
  size_t afv_y = afv_kind / 2;
414
19.5M
  HWY_ALIGN float block[4 * 8] = {};
415
97.6M
  for (size_t iy = 0; iy < 4; iy++) {
416
390M
    for (size_t ix = 0; ix < 4; ix++) {
417
312M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
312M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
312M
    }
420
78.0M
  }
421
  // AFV coefficients in (even, even) positions.
422
19.5M
  HWY_ALIGN float coeff[4 * 4];
423
19.5M
  AFVDCT4x4(block, coeff);
424
97.6M
  for (size_t iy = 0; iy < 4; iy++) {
425
390M
    for (size_t ix = 0; ix < 4; ix++) {
426
312M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
312M
    }
428
78.0M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
19.5M
  ComputeScaledDCT<4, 4>()(
431
19.5M
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
19.5M
              pixels_stride),
433
19.5M
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
97.6M
  for (size_t iy = 0; iy < 4; iy++) {
436
702M
    for (size_t ix = 0; ix < 8; ix++) {
437
624M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
624M
    }
439
78.0M
  }
440
  // 4x8 DCT of the other half of the block.
441
19.5M
  ComputeScaledDCT<4, 8>()(
442
19.5M
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
19.5M
      block, scratch_space);
444
97.6M
  for (size_t iy = 0; iy < 4; iy++) {
445
702M
    for (size_t ix = 0; ix < 8; ix++) {
446
624M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
624M
    }
448
78.0M
  }
449
19.5M
  float block00 = coefficients[0] * 0.25f;
450
19.5M
  float block01 = coefficients[1];
451
19.5M
  float block10 = coefficients[8];
452
19.5M
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
19.5M
  coefficients[1] = (block00 - block01) * 0.5f;
454
19.5M
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
19.5M
}
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Line
Count
Source
410
293k
                            float* JXL_RESTRICT coefficients) {
411
293k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
293k
  size_t afv_x = afv_kind & 1;
413
293k
  size_t afv_y = afv_kind / 2;
414
293k
  HWY_ALIGN float block[4 * 8] = {};
415
1.46M
  for (size_t iy = 0; iy < 4; iy++) {
416
5.86M
    for (size_t ix = 0; ix < 4; ix++) {
417
4.69M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
4.69M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
4.69M
    }
420
1.17M
  }
421
  // AFV coefficients in (even, even) positions.
422
293k
  HWY_ALIGN float coeff[4 * 4];
423
293k
  AFVDCT4x4(block, coeff);
424
1.46M
  for (size_t iy = 0; iy < 4; iy++) {
425
5.86M
    for (size_t ix = 0; ix < 4; ix++) {
426
4.69M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
4.69M
    }
428
1.17M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
293k
  ComputeScaledDCT<4, 4>()(
431
293k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
293k
              pixels_stride),
433
293k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.46M
  for (size_t iy = 0; iy < 4; iy++) {
436
10.5M
    for (size_t ix = 0; ix < 8; ix++) {
437
9.38M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
9.38M
    }
439
1.17M
  }
440
  // 4x8 DCT of the other half of the block.
441
293k
  ComputeScaledDCT<4, 8>()(
442
293k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
293k
      block, scratch_space);
444
1.46M
  for (size_t iy = 0; iy < 4; iy++) {
445
10.5M
    for (size_t ix = 0; ix < 8; ix++) {
446
9.38M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
9.38M
    }
448
1.17M
  }
449
293k
  float block00 = coefficients[0] * 0.25f;
450
293k
  float block01 = coefficients[1];
451
293k
  float block10 = coefficients[8];
452
293k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
293k
  coefficients[1] = (block00 - block01) * 0.5f;
454
293k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
293k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Line
Count
Source
410
655k
                            float* JXL_RESTRICT coefficients) {
411
655k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
655k
  size_t afv_x = afv_kind & 1;
413
655k
  size_t afv_y = afv_kind / 2;
414
655k
  HWY_ALIGN float block[4 * 8] = {};
415
3.27M
  for (size_t iy = 0; iy < 4; iy++) {
416
13.1M
    for (size_t ix = 0; ix < 4; ix++) {
417
10.4M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
10.4M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
10.4M
    }
420
2.62M
  }
421
  // AFV coefficients in (even, even) positions.
422
655k
  HWY_ALIGN float coeff[4 * 4];
423
655k
  AFVDCT4x4(block, coeff);
424
3.27M
  for (size_t iy = 0; iy < 4; iy++) {
425
13.1M
    for (size_t ix = 0; ix < 4; ix++) {
426
10.4M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
10.4M
    }
428
2.62M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
655k
  ComputeScaledDCT<4, 4>()(
431
655k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
655k
              pixels_stride),
433
655k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
3.27M
  for (size_t iy = 0; iy < 4; iy++) {
436
23.5M
    for (size_t ix = 0; ix < 8; ix++) {
437
20.9M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
20.9M
    }
439
2.62M
  }
440
  // 4x8 DCT of the other half of the block.
441
655k
  ComputeScaledDCT<4, 8>()(
442
655k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
655k
      block, scratch_space);
444
3.27M
  for (size_t iy = 0; iy < 4; iy++) {
445
23.5M
    for (size_t ix = 0; ix < 8; ix++) {
446
20.9M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
20.9M
    }
448
2.62M
  }
449
655k
  float block00 = coefficients[0] * 0.25f;
450
655k
  float block01 = coefficients[1];
451
655k
  float block10 = coefficients[8];
452
655k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
655k
  coefficients[1] = (block00 - block01) * 0.5f;
454
655k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
655k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Line
Count
Source
410
236k
                            float* JXL_RESTRICT coefficients) {
411
236k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
236k
  size_t afv_x = afv_kind & 1;
413
236k
  size_t afv_y = afv_kind / 2;
414
236k
  HWY_ALIGN float block[4 * 8] = {};
415
1.18M
  for (size_t iy = 0; iy < 4; iy++) {
416
4.73M
    for (size_t ix = 0; ix < 4; ix++) {
417
3.79M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
3.79M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
3.79M
    }
420
947k
  }
421
  // AFV coefficients in (even, even) positions.
422
236k
  HWY_ALIGN float coeff[4 * 4];
423
236k
  AFVDCT4x4(block, coeff);
424
1.18M
  for (size_t iy = 0; iy < 4; iy++) {
425
4.73M
    for (size_t ix = 0; ix < 4; ix++) {
426
3.79M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
3.79M
    }
428
947k
  }
429
  // 4x4 DCT of the block with same y and different x.
430
236k
  ComputeScaledDCT<4, 4>()(
431
236k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
236k
              pixels_stride),
433
236k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.18M
  for (size_t iy = 0; iy < 4; iy++) {
436
8.53M
    for (size_t ix = 0; ix < 8; ix++) {
437
7.58M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
7.58M
    }
439
947k
  }
440
  // 4x8 DCT of the other half of the block.
441
236k
  ComputeScaledDCT<4, 8>()(
442
236k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
236k
      block, scratch_space);
444
1.18M
  for (size_t iy = 0; iy < 4; iy++) {
445
8.53M
    for (size_t ix = 0; ix < 8; ix++) {
446
7.58M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
7.58M
    }
448
947k
  }
449
236k
  float block00 = coefficients[0] * 0.25f;
450
236k
  float block01 = coefficients[1];
451
236k
  float block10 = coefficients[8];
452
236k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
236k
  coefficients[1] = (block00 - block01) * 0.5f;
454
236k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
236k
}
enc_chroma_from_luma.cc:void jxl::N_AVX2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Line
Count
Source
410
376k
                            float* JXL_RESTRICT coefficients) {
411
376k
  HWY_ALIGN float scratch_space[4 * 8 * 5];
412
376k
  size_t afv_x = afv_kind & 1;
413
376k
  size_t afv_y = afv_kind / 2;
414
376k
  HWY_ALIGN float block[4 * 8] = {};
415
1.88M
  for (size_t iy = 0; iy < 4; iy++) {
416
7.52M
    for (size_t ix = 0; ix < 4; ix++) {
417
6.02M
      block[(afv_y == 1 ? 3 - iy : iy) * 4 + (afv_x == 1 ? 3 - ix : ix)] =
418
6.02M
          pixels[(iy + 4 * afv_y) * pixels_stride + ix + 4 * afv_x];
419
6.02M
    }
420
1.50M
  }
421
  // AFV coefficients in (even, even) positions.
422
376k
  HWY_ALIGN float coeff[4 * 4];
423
376k
  AFVDCT4x4(block, coeff);
424
1.88M
  for (size_t iy = 0; iy < 4; iy++) {
425
7.52M
    for (size_t ix = 0; ix < 4; ix++) {
426
6.02M
      coefficients[iy * 2 * 8 + ix * 2] = coeff[iy * 4 + ix];
427
6.02M
    }
428
1.50M
  }
429
  // 4x4 DCT of the block with same y and different x.
430
376k
  ComputeScaledDCT<4, 4>()(
431
376k
      DCTFrom(pixels + afv_y * 4 * pixels_stride + (afv_x == 1 ? 0 : 4),
432
376k
              pixels_stride),
433
376k
      block, scratch_space);
434
  // ... in (odd, even) positions.
435
1.88M
  for (size_t iy = 0; iy < 4; iy++) {
436
13.5M
    for (size_t ix = 0; ix < 8; ix++) {
437
12.0M
      coefficients[iy * 2 * 8 + ix * 2 + 1] = block[iy * 4 + ix];
438
12.0M
    }
439
1.50M
  }
440
  // 4x8 DCT of the other half of the block.
441
376k
  ComputeScaledDCT<4, 8>()(
442
376k
      DCTFrom(pixels + (afv_y == 1 ? 0 : 4) * pixels_stride, pixels_stride),
443
376k
      block, scratch_space);
444
1.88M
  for (size_t iy = 0; iy < 4; iy++) {
445
13.5M
    for (size_t ix = 0; ix < 8; ix++) {
446
12.0M
      coefficients[(1 + iy * 2) * 8 + ix] = block[iy * 8 + ix];
447
12.0M
    }
448
1.50M
  }
449
376k
  float block00 = coefficients[0] * 0.25f;
450
376k
  float block01 = coefficients[1];
451
376k
  float block10 = coefficients[8];
452
376k
  coefficients[0] = (block00 + block01 + 2 * block10) * 0.25f;
453
376k
  coefficients[1] = (block00 - block01) * 0.5f;
454
376k
  coefficients[8] = (block00 + block01 - 2 * block10) * 0.25f;
455
376k
}
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_ZEN4::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_AVX3_SPR::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<0ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<1ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<2ul>(float const*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:void jxl::N_SSE2::(anonymous namespace)::AFVTransformFromPixels<3ul>(float const*, unsigned long, float*)
456
457
HWY_MAYBE_UNUSED void TransformFromPixels(const AcStrategyType strategy,
458
                                          const float* JXL_RESTRICT pixels,
459
                                          size_t pixels_stride,
460
                                          float* JXL_RESTRICT coefficients,
461
280M
                                          float* JXL_RESTRICT scratch_space) {
462
280M
  using Type = AcStrategyType;
463
280M
  switch (strategy) {
464
21.6M
    case Type::IDENTITY: {
465
64.8M
      for (size_t y = 0; y < 2; y++) {
466
129M
        for (size_t x = 0; x < 2; x++) {
467
86.4M
          float block_dc = 0;
468
432M
          for (size_t iy = 0; iy < 4; iy++) {
469
1.72G
            for (size_t ix = 0; ix < 4; ix++) {
470
1.38G
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
1.38G
            }
472
345M
          }
473
86.4M
          block_dc *= 1.0f / 16;
474
432M
          for (size_t iy = 0; iy < 4; iy++) {
475
1.72G
            for (size_t ix = 0; ix < 4; ix++) {
476
1.38G
              if (ix == 1 && iy == 1) continue;
477
1.29G
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
1.29G
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
1.29G
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
1.29G
            }
481
345M
          }
482
86.4M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
86.4M
          coefficients[y * 8 + x] = block_dc;
484
86.4M
        }
485
43.2M
      }
486
21.6M
      float block00 = coefficients[0];
487
21.6M
      float block01 = coefficients[1];
488
21.6M
      float block10 = coefficients[8];
489
21.6M
      float block11 = coefficients[9];
490
21.6M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
21.6M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
21.6M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
21.6M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
21.6M
      break;
495
0
    }
496
20.4M
    case Type::DCT8X4: {
497
61.3M
      for (size_t x = 0; x < 2; x++) {
498
40.8M
        HWY_ALIGN float block[4 * 8];
499
40.8M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
40.8M
                                 scratch_space);
501
204M
        for (size_t iy = 0; iy < 4; iy++) {
502
1.47G
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
1.30G
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
1.30G
          }
506
163M
        }
507
40.8M
      }
508
20.4M
      float block0 = coefficients[0];
509
20.4M
      float block1 = coefficients[8];
510
20.4M
      coefficients[0] = (block0 + block1) * 0.5f;
511
20.4M
      coefficients[8] = (block0 - block1) * 0.5f;
512
20.4M
      break;
513
0
    }
514
20.1M
    case Type::DCT4X8: {
515
60.4M
      for (size_t y = 0; y < 2; y++) {
516
40.3M
        HWY_ALIGN float block[4 * 8];
517
40.3M
        ComputeScaledDCT<4, 8>()(
518
40.3M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
40.3M
            scratch_space);
520
201M
        for (size_t iy = 0; iy < 4; iy++) {
521
1.45G
          for (size_t ix = 0; ix < 8; ix++) {
522
1.28G
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
1.28G
          }
524
161M
        }
525
40.3M
      }
526
20.1M
      float block0 = coefficients[0];
527
20.1M
      float block1 = coefficients[8];
528
20.1M
      coefficients[0] = (block0 + block1) * 0.5f;
529
20.1M
      coefficients[8] = (block0 - block1) * 0.5f;
530
20.1M
      break;
531
0
    }
532
19.5M
    case Type::DCT4X4: {
533
58.5M
      for (size_t y = 0; y < 2; y++) {
534
117M
        for (size_t x = 0; x < 2; x++) {
535
78.1M
          HWY_ALIGN float block[4 * 4];
536
78.1M
          ComputeScaledDCT<4, 4>()(
537
78.1M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
78.1M
              block, scratch_space);
539
390M
          for (size_t iy = 0; iy < 4; iy++) {
540
1.56G
            for (size_t ix = 0; ix < 4; ix++) {
541
1.24G
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
1.24G
            }
543
312M
          }
544
78.1M
        }
545
39.0M
      }
546
19.5M
      float block00 = coefficients[0];
547
19.5M
      float block01 = coefficients[1];
548
19.5M
      float block10 = coefficients[8];
549
19.5M
      float block11 = coefficients[9];
550
19.5M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
19.5M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
19.5M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
19.5M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
19.5M
      break;
555
0
    }
556
21.5M
    case Type::DCT2X2: {
557
21.5M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
21.5M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
21.5M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
21.5M
      break;
561
0
    }
562
8.08M
    case Type::DCT16X16: {
563
8.08M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
8.08M
                                 scratch_space);
565
8.08M
      break;
566
0
    }
567
15.7M
    case Type::DCT16X8: {
568
15.7M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
15.7M
                                scratch_space);
570
15.7M
      break;
571
0
    }
572
16.1M
    case Type::DCT8X16: {
573
16.1M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
16.1M
                                scratch_space);
575
16.1M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
3.20M
    case Type::DCT32X16: {
588
3.20M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
3.20M
                                 scratch_space);
590
3.20M
      break;
591
0
    }
592
3.32M
    case Type::DCT16X32: {
593
3.32M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
3.32M
                                 scratch_space);
595
3.32M
      break;
596
0
    }
597
2.02M
    case Type::DCT32X32: {
598
2.02M
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
2.02M
                                 scratch_space);
600
2.02M
      break;
601
0
    }
602
45.1M
    case Type::DCT: {
603
45.1M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
45.1M
                               scratch_space);
605
45.1M
      break;
606
0
    }
607
20.1M
    case Type::AFV0: {
608
20.1M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
20.1M
      break;
610
0
    }
611
20.8M
    case Type::AFV1: {
612
20.8M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
20.8M
      break;
614
0
    }
615
19.9M
    case Type::AFV2: {
616
19.9M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
19.9M
      break;
618
0
    }
619
20.2M
    case Type::AFV3: {
620
20.2M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
20.2M
      break;
622
0
    }
623
371k
    case Type::DCT64X64: {
624
371k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
371k
                                 scratch_space);
626
371k
      break;
627
0
    }
628
996k
    case Type::DCT64X32: {
629
996k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
996k
                                 scratch_space);
631
996k
      break;
632
0
    }
633
667k
    case Type::DCT32X64: {
634
667k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
667k
                                 scratch_space);
636
667k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
280M
  }
669
280M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
8.91M
                                          float* JXL_RESTRICT scratch_space) {
462
8.91M
  using Type = AcStrategyType;
463
8.91M
  switch (strategy) {
464
1.04M
    case Type::IDENTITY: {
465
3.14M
      for (size_t y = 0; y < 2; y++) {
466
6.28M
        for (size_t x = 0; x < 2; x++) {
467
4.18M
          float block_dc = 0;
468
20.9M
          for (size_t iy = 0; iy < 4; iy++) {
469
83.7M
            for (size_t ix = 0; ix < 4; ix++) {
470
66.9M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
66.9M
            }
472
16.7M
          }
473
4.18M
          block_dc *= 1.0f / 16;
474
20.9M
          for (size_t iy = 0; iy < 4; iy++) {
475
83.7M
            for (size_t ix = 0; ix < 4; ix++) {
476
66.9M
              if (ix == 1 && iy == 1) continue;
477
62.8M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
62.8M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
62.8M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
62.8M
            }
481
16.7M
          }
482
4.18M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
4.18M
          coefficients[y * 8 + x] = block_dc;
484
4.18M
        }
485
2.09M
      }
486
1.04M
      float block00 = coefficients[0];
487
1.04M
      float block01 = coefficients[1];
488
1.04M
      float block10 = coefficients[8];
489
1.04M
      float block11 = coefficients[9];
490
1.04M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
1.04M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
1.04M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
1.04M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
1.04M
      break;
495
0
    }
496
455k
    case Type::DCT8X4: {
497
1.36M
      for (size_t x = 0; x < 2; x++) {
498
910k
        HWY_ALIGN float block[4 * 8];
499
910k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
910k
                                 scratch_space);
501
4.55M
        for (size_t iy = 0; iy < 4; iy++) {
502
32.7M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
29.1M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
29.1M
          }
506
3.64M
        }
507
910k
      }
508
455k
      float block0 = coefficients[0];
509
455k
      float block1 = coefficients[8];
510
455k
      coefficients[0] = (block0 + block1) * 0.5f;
511
455k
      coefficients[8] = (block0 - block1) * 0.5f;
512
455k
      break;
513
0
    }
514
313k
    case Type::DCT4X8: {
515
940k
      for (size_t y = 0; y < 2; y++) {
516
626k
        HWY_ALIGN float block[4 * 8];
517
626k
        ComputeScaledDCT<4, 8>()(
518
626k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
626k
            scratch_space);
520
3.13M
        for (size_t iy = 0; iy < 4; iy++) {
521
22.5M
          for (size_t ix = 0; ix < 8; ix++) {
522
20.0M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
20.0M
          }
524
2.50M
        }
525
626k
      }
526
313k
      float block0 = coefficients[0];
527
313k
      float block1 = coefficients[8];
528
313k
      coefficients[0] = (block0 + block1) * 0.5f;
529
313k
      coefficients[8] = (block0 - block1) * 0.5f;
530
313k
      break;
531
0
    }
532
2.43k
    case Type::DCT4X4: {
533
7.30k
      for (size_t y = 0; y < 2; y++) {
534
14.6k
        for (size_t x = 0; x < 2; x++) {
535
9.74k
          HWY_ALIGN float block[4 * 4];
536
9.74k
          ComputeScaledDCT<4, 4>()(
537
9.74k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
9.74k
              block, scratch_space);
539
48.7k
          for (size_t iy = 0; iy < 4; iy++) {
540
194k
            for (size_t ix = 0; ix < 4; ix++) {
541
155k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
155k
            }
543
38.9k
          }
544
9.74k
        }
545
4.87k
      }
546
2.43k
      float block00 = coefficients[0];
547
2.43k
      float block01 = coefficients[1];
548
2.43k
      float block10 = coefficients[8];
549
2.43k
      float block11 = coefficients[9];
550
2.43k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
2.43k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
2.43k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
2.43k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
2.43k
      break;
555
0
    }
556
1.01M
    case Type::DCT2X2: {
557
1.01M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
1.01M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
1.01M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
1.01M
      break;
561
0
    }
562
169k
    case Type::DCT16X16: {
563
169k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
169k
                                 scratch_space);
565
169k
      break;
566
0
    }
567
297k
    case Type::DCT16X8: {
568
297k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
297k
                                scratch_space);
570
297k
      break;
571
0
    }
572
444k
    case Type::DCT8X16: {
573
444k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
444k
                                scratch_space);
575
444k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
93.4k
    case Type::DCT32X16: {
588
93.4k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
93.4k
                                 scratch_space);
590
93.4k
      break;
591
0
    }
592
143k
    case Type::DCT16X32: {
593
143k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
143k
                                 scratch_space);
595
143k
      break;
596
0
    }
597
244k
    case Type::DCT32X32: {
598
244k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
244k
                                 scratch_space);
600
244k
      break;
601
0
    }
602
3.05M
    case Type::DCT: {
603
3.05M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
3.05M
                               scratch_space);
605
3.05M
      break;
606
0
    }
607
293k
    case Type::AFV0: {
608
293k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
293k
      break;
610
0
    }
611
655k
    case Type::AFV1: {
612
655k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
655k
      break;
614
0
    }
615
236k
    case Type::AFV2: {
616
236k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
236k
      break;
618
0
    }
619
376k
    case Type::AFV3: {
620
376k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
376k
      break;
622
0
    }
623
54.6k
    case Type::DCT64X64: {
624
54.6k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
54.6k
                                 scratch_space);
626
54.6k
      break;
627
0
    }
628
8.17k
    case Type::DCT64X32: {
629
8.17k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
8.17k
                                 scratch_space);
631
8.17k
      break;
632
0
    }
633
11.2k
    case Type::DCT32X64: {
634
11.2k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
11.2k
                                 scratch_space);
636
11.2k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
8.91M
  }
669
8.91M
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
242M
                                          float* JXL_RESTRICT scratch_space) {
462
242M
  using Type = AcStrategyType;
463
242M
  switch (strategy) {
464
19.5M
    case Type::IDENTITY: {
465
58.5M
      for (size_t y = 0; y < 2; y++) {
466
117M
        for (size_t x = 0; x < 2; x++) {
467
78.0M
          float block_dc = 0;
468
390M
          for (size_t iy = 0; iy < 4; iy++) {
469
1.56G
            for (size_t ix = 0; ix < 4; ix++) {
470
1.24G
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
1.24G
            }
472
312M
          }
473
78.0M
          block_dc *= 1.0f / 16;
474
390M
          for (size_t iy = 0; iy < 4; iy++) {
475
1.56G
            for (size_t ix = 0; ix < 4; ix++) {
476
1.24G
              if (ix == 1 && iy == 1) continue;
477
1.17G
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
1.17G
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
1.17G
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
1.17G
            }
481
312M
          }
482
78.0M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
78.0M
          coefficients[y * 8 + x] = block_dc;
484
78.0M
        }
485
39.0M
      }
486
19.5M
      float block00 = coefficients[0];
487
19.5M
      float block01 = coefficients[1];
488
19.5M
      float block10 = coefficients[8];
489
19.5M
      float block11 = coefficients[9];
490
19.5M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
19.5M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
19.5M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
19.5M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
19.5M
      break;
495
0
    }
496
19.5M
    case Type::DCT8X4: {
497
58.5M
      for (size_t x = 0; x < 2; x++) {
498
39.0M
        HWY_ALIGN float block[4 * 8];
499
39.0M
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
39.0M
                                 scratch_space);
501
195M
        for (size_t iy = 0; iy < 4; iy++) {
502
1.40G
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
1.24G
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
1.24G
          }
506
156M
        }
507
39.0M
      }
508
19.5M
      float block0 = coefficients[0];
509
19.5M
      float block1 = coefficients[8];
510
19.5M
      coefficients[0] = (block0 + block1) * 0.5f;
511
19.5M
      coefficients[8] = (block0 - block1) * 0.5f;
512
19.5M
      break;
513
0
    }
514
19.5M
    case Type::DCT4X8: {
515
58.5M
      for (size_t y = 0; y < 2; y++) {
516
39.0M
        HWY_ALIGN float block[4 * 8];
517
39.0M
        ComputeScaledDCT<4, 8>()(
518
39.0M
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
39.0M
            scratch_space);
520
195M
        for (size_t iy = 0; iy < 4; iy++) {
521
1.40G
          for (size_t ix = 0; ix < 8; ix++) {
522
1.24G
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
1.24G
          }
524
156M
        }
525
39.0M
      }
526
19.5M
      float block0 = coefficients[0];
527
19.5M
      float block1 = coefficients[8];
528
19.5M
      coefficients[0] = (block0 + block1) * 0.5f;
529
19.5M
      coefficients[8] = (block0 - block1) * 0.5f;
530
19.5M
      break;
531
0
    }
532
19.5M
    case Type::DCT4X4: {
533
58.5M
      for (size_t y = 0; y < 2; y++) {
534
117M
        for (size_t x = 0; x < 2; x++) {
535
78.0M
          HWY_ALIGN float block[4 * 4];
536
78.0M
          ComputeScaledDCT<4, 4>()(
537
78.0M
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
78.0M
              block, scratch_space);
539
390M
          for (size_t iy = 0; iy < 4; iy++) {
540
1.56G
            for (size_t ix = 0; ix < 4; ix++) {
541
1.24G
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
1.24G
            }
543
312M
          }
544
78.0M
        }
545
39.0M
      }
546
19.5M
      float block00 = coefficients[0];
547
19.5M
      float block01 = coefficients[1];
548
19.5M
      float block10 = coefficients[8];
549
19.5M
      float block11 = coefficients[9];
550
19.5M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
19.5M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
19.5M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
19.5M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
19.5M
      break;
555
0
    }
556
19.5M
    case Type::DCT2X2: {
557
19.5M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
19.5M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
19.5M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
19.5M
      break;
561
0
    }
562
7.74M
    case Type::DCT16X16: {
563
7.74M
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
7.74M
                                 scratch_space);
565
7.74M
      break;
566
0
    }
567
15.1M
    case Type::DCT16X8: {
568
15.1M
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
15.1M
                                scratch_space);
570
15.1M
      break;
571
0
    }
572
15.2M
    case Type::DCT8X16: {
573
15.2M
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
15.2M
                                scratch_space);
575
15.2M
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
3.01M
    case Type::DCT32X16: {
588
3.01M
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
3.01M
                                 scratch_space);
590
3.01M
      break;
591
0
    }
592
3.03M
    case Type::DCT16X32: {
593
3.03M
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
3.03M
                                 scratch_space);
595
3.03M
      break;
596
0
    }
597
1.53M
    case Type::DCT32X32: {
598
1.53M
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
1.53M
                                 scratch_space);
600
1.53M
      break;
601
0
    }
602
19.5M
    case Type::DCT: {
603
19.5M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
19.5M
                               scratch_space);
605
19.5M
      break;
606
0
    }
607
19.5M
    case Type::AFV0: {
608
19.5M
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
19.5M
      break;
610
0
    }
611
19.5M
    case Type::AFV1: {
612
19.5M
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
19.5M
      break;
614
0
    }
615
19.5M
    case Type::AFV2: {
616
19.5M
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
19.5M
      break;
618
0
    }
619
19.5M
    case Type::AFV3: {
620
19.5M
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
19.5M
      break;
622
0
    }
623
261k
    case Type::DCT64X64: {
624
261k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
261k
                                 scratch_space);
626
261k
      break;
627
0
    }
628
980k
    case Type::DCT64X32: {
629
980k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
980k
                                 scratch_space);
631
980k
      break;
632
0
    }
633
644k
    case Type::DCT32X64: {
634
644k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
644k
                                 scratch_space);
636
644k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
242M
  }
669
242M
}
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Line
Count
Source
461
28.4M
                                          float* JXL_RESTRICT scratch_space) {
462
28.4M
  using Type = AcStrategyType;
463
28.4M
  switch (strategy) {
464
1.04M
    case Type::IDENTITY: {
465
3.14M
      for (size_t y = 0; y < 2; y++) {
466
6.28M
        for (size_t x = 0; x < 2; x++) {
467
4.18M
          float block_dc = 0;
468
20.9M
          for (size_t iy = 0; iy < 4; iy++) {
469
83.7M
            for (size_t ix = 0; ix < 4; ix++) {
470
66.9M
              block_dc += pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix];
471
66.9M
            }
472
16.7M
          }
473
4.18M
          block_dc *= 1.0f / 16;
474
20.9M
          for (size_t iy = 0; iy < 4; iy++) {
475
83.7M
            for (size_t ix = 0; ix < 4; ix++) {
476
66.9M
              if (ix == 1 && iy == 1) continue;
477
62.8M
              coefficients[(y + iy * 2) * 8 + x + ix * 2] =
478
62.8M
                  pixels[(y * 4 + iy) * pixels_stride + x * 4 + ix] -
479
62.8M
                  pixels[(y * 4 + 1) * pixels_stride + x * 4 + 1];
480
62.8M
            }
481
16.7M
          }
482
4.18M
          coefficients[(y + 2) * 8 + x + 2] = coefficients[y * 8 + x];
483
4.18M
          coefficients[y * 8 + x] = block_dc;
484
4.18M
        }
485
2.09M
      }
486
1.04M
      float block00 = coefficients[0];
487
1.04M
      float block01 = coefficients[1];
488
1.04M
      float block10 = coefficients[8];
489
1.04M
      float block11 = coefficients[9];
490
1.04M
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
491
1.04M
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
492
1.04M
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
493
1.04M
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
494
1.04M
      break;
495
0
    }
496
455k
    case Type::DCT8X4: {
497
1.36M
      for (size_t x = 0; x < 2; x++) {
498
910k
        HWY_ALIGN float block[4 * 8];
499
910k
        ComputeScaledDCT<8, 4>()(DCTFrom(pixels + x * 4, pixels_stride), block,
500
910k
                                 scratch_space);
501
4.55M
        for (size_t iy = 0; iy < 4; iy++) {
502
32.7M
          for (size_t ix = 0; ix < 8; ix++) {
503
            // Store transposed.
504
29.1M
            coefficients[(x + iy * 2) * 8 + ix] = block[iy * 8 + ix];
505
29.1M
          }
506
3.64M
        }
507
910k
      }
508
455k
      float block0 = coefficients[0];
509
455k
      float block1 = coefficients[8];
510
455k
      coefficients[0] = (block0 + block1) * 0.5f;
511
455k
      coefficients[8] = (block0 - block1) * 0.5f;
512
455k
      break;
513
0
    }
514
313k
    case Type::DCT4X8: {
515
940k
      for (size_t y = 0; y < 2; y++) {
516
626k
        HWY_ALIGN float block[4 * 8];
517
626k
        ComputeScaledDCT<4, 8>()(
518
626k
            DCTFrom(pixels + y * 4 * pixels_stride, pixels_stride), block,
519
626k
            scratch_space);
520
3.13M
        for (size_t iy = 0; iy < 4; iy++) {
521
22.5M
          for (size_t ix = 0; ix < 8; ix++) {
522
20.0M
            coefficients[(y + iy * 2) * 8 + ix] = block[iy * 8 + ix];
523
20.0M
          }
524
2.50M
        }
525
626k
      }
526
313k
      float block0 = coefficients[0];
527
313k
      float block1 = coefficients[8];
528
313k
      coefficients[0] = (block0 + block1) * 0.5f;
529
313k
      coefficients[8] = (block0 - block1) * 0.5f;
530
313k
      break;
531
0
    }
532
2.43k
    case Type::DCT4X4: {
533
7.30k
      for (size_t y = 0; y < 2; y++) {
534
14.6k
        for (size_t x = 0; x < 2; x++) {
535
9.74k
          HWY_ALIGN float block[4 * 4];
536
9.74k
          ComputeScaledDCT<4, 4>()(
537
9.74k
              DCTFrom(pixels + y * 4 * pixels_stride + x * 4, pixels_stride),
538
9.74k
              block, scratch_space);
539
48.7k
          for (size_t iy = 0; iy < 4; iy++) {
540
194k
            for (size_t ix = 0; ix < 4; ix++) {
541
155k
              coefficients[(y + iy * 2) * 8 + x + ix * 2] = block[iy * 4 + ix];
542
155k
            }
543
38.9k
          }
544
9.74k
        }
545
4.87k
      }
546
2.43k
      float block00 = coefficients[0];
547
2.43k
      float block01 = coefficients[1];
548
2.43k
      float block10 = coefficients[8];
549
2.43k
      float block11 = coefficients[9];
550
2.43k
      coefficients[0] = (block00 + block01 + block10 + block11) * 0.25f;
551
2.43k
      coefficients[1] = (block00 + block01 - block10 - block11) * 0.25f;
552
2.43k
      coefficients[8] = (block00 - block01 + block10 - block11) * 0.25f;
553
2.43k
      coefficients[9] = (block00 - block01 - block10 + block11) * 0.25f;
554
2.43k
      break;
555
0
    }
556
1.01M
    case Type::DCT2X2: {
557
1.01M
      DCT2TopBlock<8>(pixels, pixels_stride, coefficients);
558
1.01M
      DCT2TopBlock<4>(coefficients, kBlockDim, coefficients);
559
1.01M
      DCT2TopBlock<2>(coefficients, kBlockDim, coefficients);
560
1.01M
      break;
561
0
    }
562
169k
    case Type::DCT16X16: {
563
169k
      ComputeScaledDCT<16, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
564
169k
                                 scratch_space);
565
169k
      break;
566
0
    }
567
297k
    case Type::DCT16X8: {
568
297k
      ComputeScaledDCT<16, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
569
297k
                                scratch_space);
570
297k
      break;
571
0
    }
572
444k
    case Type::DCT8X16: {
573
444k
      ComputeScaledDCT<8, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
574
444k
                                scratch_space);
575
444k
      break;
576
0
    }
577
0
    case Type::DCT32X8: {
578
0
      ComputeScaledDCT<32, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
579
0
                                scratch_space);
580
0
      break;
581
0
    }
582
0
    case Type::DCT8X32: {
583
0
      ComputeScaledDCT<8, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
584
0
                                scratch_space);
585
0
      break;
586
0
    }
587
93.4k
    case Type::DCT32X16: {
588
93.4k
      ComputeScaledDCT<32, 16>()(DCTFrom(pixels, pixels_stride), coefficients,
589
93.4k
                                 scratch_space);
590
93.4k
      break;
591
0
    }
592
143k
    case Type::DCT16X32: {
593
143k
      ComputeScaledDCT<16, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
594
143k
                                 scratch_space);
595
143k
      break;
596
0
    }
597
244k
    case Type::DCT32X32: {
598
244k
      ComputeScaledDCT<32, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
599
244k
                                 scratch_space);
600
244k
      break;
601
0
    }
602
22.5M
    case Type::DCT: {
603
22.5M
      ComputeScaledDCT<8, 8>()(DCTFrom(pixels, pixels_stride), coefficients,
604
22.5M
                               scratch_space);
605
22.5M
      break;
606
0
    }
607
293k
    case Type::AFV0: {
608
293k
      AFVTransformFromPixels<0>(pixels, pixels_stride, coefficients);
609
293k
      break;
610
0
    }
611
655k
    case Type::AFV1: {
612
655k
      AFVTransformFromPixels<1>(pixels, pixels_stride, coefficients);
613
655k
      break;
614
0
    }
615
236k
    case Type::AFV2: {
616
236k
      AFVTransformFromPixels<2>(pixels, pixels_stride, coefficients);
617
236k
      break;
618
0
    }
619
376k
    case Type::AFV3: {
620
376k
      AFVTransformFromPixels<3>(pixels, pixels_stride, coefficients);
621
376k
      break;
622
0
    }
623
54.6k
    case Type::DCT64X64: {
624
54.6k
      ComputeScaledDCT<64, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
625
54.6k
                                 scratch_space);
626
54.6k
      break;
627
0
    }
628
8.17k
    case Type::DCT64X32: {
629
8.17k
      ComputeScaledDCT<64, 32>()(DCTFrom(pixels, pixels_stride), coefficients,
630
8.17k
                                 scratch_space);
631
8.17k
      break;
632
0
    }
633
11.2k
    case Type::DCT32X64: {
634
11.2k
      ComputeScaledDCT<32, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
635
11.2k
                                 scratch_space);
636
11.2k
      break;
637
0
    }
638
0
    case Type::DCT128X128: {
639
0
      ComputeScaledDCT<128, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
640
0
                                   scratch_space);
641
0
      break;
642
0
    }
643
0
    case Type::DCT128X64: {
644
0
      ComputeScaledDCT<128, 64>()(DCTFrom(pixels, pixels_stride), coefficients,
645
0
                                  scratch_space);
646
0
      break;
647
0
    }
648
0
    case Type::DCT64X128: {
649
0
      ComputeScaledDCT<64, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
650
0
                                  scratch_space);
651
0
      break;
652
0
    }
653
0
    case Type::DCT256X256: {
654
0
      ComputeScaledDCT<256, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
655
0
                                   scratch_space);
656
0
      break;
657
0
    }
658
0
    case Type::DCT256X128: {
659
0
      ComputeScaledDCT<256, 128>()(DCTFrom(pixels, pixels_stride), coefficients,
660
0
                                   scratch_space);
661
0
      break;
662
0
    }
663
0
    case Type::DCT128X256: {
664
0
      ComputeScaledDCT<128, 256>()(DCTFrom(pixels, pixels_stride), coefficients,
665
0
                                   scratch_space);
666
0
      break;
667
0
    }
668
28.4M
  }
669
28.4M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::TransformFromPixels(jxl::AcStrategyType, float const*, unsigned long, float*, float*)
670
671
// `scratch_space` should be at least 4 * kMaxBlocks * kMaxBlocks elements.
672
HWY_MAYBE_UNUSED void DCFromLowestFrequencies(const AcStrategyType strategy,
673
                                              const float* block, float* dc,
674
                                              size_t dc_stride,
675
37.3M
                                              float* scratch_space) {
676
37.3M
  using Type = AcStrategyType;
677
37.3M
  switch (strategy) {
678
594k
    case Type::DCT16X8: {
679
594k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
594k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
594k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
594k
      break;
683
0
    }
684
889k
    case Type::DCT8X16: {
685
889k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
889k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
889k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
889k
      break;
689
0
    }
690
338k
    case Type::DCT16X16: {
691
338k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
338k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
338k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
338k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
186k
    case Type::DCT32X16: {
709
186k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
186k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
186k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
186k
      break;
713
0
    }
714
287k
    case Type::DCT16X32: {
715
287k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
287k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
287k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
287k
      break;
719
0
    }
720
488k
    case Type::DCT32X32: {
721
488k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
488k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
488k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
488k
      break;
725
0
    }
726
16.3k
    case Type::DCT64X32: {
727
16.3k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
16.3k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
16.3k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
16.3k
      break;
731
0
    }
732
22.4k
    case Type::DCT32X64: {
733
22.4k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
22.4k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
22.4k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
22.4k
      break;
737
0
    }
738
109k
    case Type::DCT64X64: {
739
109k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
109k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
109k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
109k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
25.6M
    case Type::DCT:
787
27.6M
    case Type::DCT2X2:
788
27.6M
    case Type::DCT4X4:
789
28.2M
    case Type::DCT4X8:
790
29.2M
    case Type::DCT8X4:
791
29.7M
    case Type::AFV0:
792
31.0M
    case Type::AFV1:
793
31.5M
    case Type::AFV2:
794
32.3M
    case Type::AFV3:
795
34.4M
    case Type::IDENTITY:
796
34.4M
      dc[0] = block[0];
797
34.4M
      break;
798
37.3M
  }
799
37.3M
}
Unexecuted instantiation: enc_group.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_group.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
8.91M
                                              float* scratch_space) {
676
8.91M
  using Type = AcStrategyType;
677
8.91M
  switch (strategy) {
678
297k
    case Type::DCT16X8: {
679
297k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
297k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
297k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
297k
      break;
683
0
    }
684
444k
    case Type::DCT8X16: {
685
444k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
444k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
444k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
444k
      break;
689
0
    }
690
169k
    case Type::DCT16X16: {
691
169k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
169k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
169k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
169k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
93.4k
    case Type::DCT32X16: {
709
93.4k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
93.4k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
93.4k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
93.4k
      break;
713
0
    }
714
143k
    case Type::DCT16X32: {
715
143k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
143k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
143k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
143k
      break;
719
0
    }
720
244k
    case Type::DCT32X32: {
721
244k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
244k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
244k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
244k
      break;
725
0
    }
726
8.17k
    case Type::DCT64X32: {
727
8.17k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
8.17k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
8.17k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
8.17k
      break;
731
0
    }
732
11.2k
    case Type::DCT32X64: {
733
11.2k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
11.2k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
11.2k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
11.2k
      break;
737
0
    }
738
54.6k
    case Type::DCT64X64: {
739
54.6k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
54.6k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
54.6k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
54.6k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
3.05M
    case Type::DCT:
787
4.06M
    case Type::DCT2X2:
788
4.06M
    case Type::DCT4X4:
789
4.38M
    case Type::DCT4X8:
790
4.83M
    case Type::DCT8X4:
791
5.13M
    case Type::AFV0:
792
5.78M
    case Type::AFV1:
793
6.02M
    case Type::AFV2:
794
6.39M
    case Type::AFV3:
795
7.44M
    case Type::IDENTITY:
796
7.44M
      dc[0] = block[0];
797
7.44M
      break;
798
8.91M
  }
799
8.91M
}
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_group.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_ac_strategy.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_adaptive_quantization.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
enc_chroma_from_luma.cc:jxl::N_AVX2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Line
Count
Source
675
28.4M
                                              float* scratch_space) {
676
28.4M
  using Type = AcStrategyType;
677
28.4M
  switch (strategy) {
678
297k
    case Type::DCT16X8: {
679
297k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/kBlockDim,
680
297k
                         /*LF_ROWS=*/2, /*LF_COLS=*/1, /*ROWS=*/2, /*COLS=*/1>(
681
297k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
682
297k
      break;
683
0
    }
684
444k
    case Type::DCT8X16: {
685
444k
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
686
444k
                         /*LF_ROWS=*/1, /*LF_COLS=*/2, /*ROWS=*/1, /*COLS=*/2>(
687
444k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
688
444k
      break;
689
0
    }
690
169k
    case Type::DCT16X16: {
691
169k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
692
169k
                         /*LF_ROWS=*/2, /*LF_COLS=*/2, /*ROWS=*/2, /*COLS=*/2>(
693
169k
          block, 2 * kBlockDim, dc, dc_stride, scratch_space);
694
169k
      break;
695
0
    }
696
0
    case Type::DCT32X8: {
697
0
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/kBlockDim,
698
0
                         /*LF_ROWS=*/4, /*LF_COLS=*/1, /*ROWS=*/4, /*COLS=*/1>(
699
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
700
0
      break;
701
0
    }
702
0
    case Type::DCT8X32: {
703
0
      ReinterpretingIDCT</*DCT_ROWS=*/kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
704
0
                         /*LF_ROWS=*/1, /*LF_COLS=*/4, /*ROWS=*/1, /*COLS=*/4>(
705
0
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
706
0
      break;
707
0
    }
708
93.4k
    case Type::DCT32X16: {
709
93.4k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/2 * kBlockDim,
710
93.4k
                         /*LF_ROWS=*/4, /*LF_COLS=*/2, /*ROWS=*/4, /*COLS=*/2>(
711
93.4k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
712
93.4k
      break;
713
0
    }
714
143k
    case Type::DCT16X32: {
715
143k
      ReinterpretingIDCT</*DCT_ROWS=*/2 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
716
143k
                         /*LF_ROWS=*/2, /*LF_COLS=*/4, /*ROWS=*/2, /*COLS=*/4>(
717
143k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
718
143k
      break;
719
0
    }
720
244k
    case Type::DCT32X32: {
721
244k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
722
244k
                         /*LF_ROWS=*/4, /*LF_COLS=*/4, /*ROWS=*/4, /*COLS=*/4>(
723
244k
          block, 4 * kBlockDim, dc, dc_stride, scratch_space);
724
244k
      break;
725
0
    }
726
8.17k
    case Type::DCT64X32: {
727
8.17k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/4 * kBlockDim,
728
8.17k
                         /*LF_ROWS=*/8, /*LF_COLS=*/4, /*ROWS=*/8, /*COLS=*/4>(
729
8.17k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
730
8.17k
      break;
731
0
    }
732
11.2k
    case Type::DCT32X64: {
733
11.2k
      ReinterpretingIDCT</*DCT_ROWS=*/4 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
734
11.2k
                         /*LF_ROWS=*/4, /*LF_COLS=*/8, /*ROWS=*/4, /*COLS=*/8>(
735
11.2k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
736
11.2k
      break;
737
0
    }
738
54.6k
    case Type::DCT64X64: {
739
54.6k
      ReinterpretingIDCT</*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
740
54.6k
                         /*LF_ROWS=*/8, /*LF_COLS=*/8, /*ROWS=*/8, /*COLS=*/8>(
741
54.6k
          block, 8 * kBlockDim, dc, dc_stride, scratch_space);
742
54.6k
      break;
743
0
    }
744
0
    case Type::DCT128X64: {
745
0
      ReinterpretingIDCT<
746
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/8 * kBlockDim,
747
0
          /*LF_ROWS=*/16, /*LF_COLS=*/8, /*ROWS=*/16, /*COLS=*/8>(
748
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
749
0
      break;
750
0
    }
751
0
    case Type::DCT64X128: {
752
0
      ReinterpretingIDCT<
753
0
          /*DCT_ROWS=*/8 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
754
0
          /*LF_ROWS=*/8, /*LF_COLS=*/16, /*ROWS=*/8, /*COLS=*/16>(
755
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
756
0
      break;
757
0
    }
758
0
    case Type::DCT128X128: {
759
0
      ReinterpretingIDCT<
760
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
761
0
          /*LF_ROWS=*/16, /*LF_COLS=*/16, /*ROWS=*/16, /*COLS=*/16>(
762
0
          block, 16 * kBlockDim, dc, dc_stride, scratch_space);
763
0
      break;
764
0
    }
765
0
    case Type::DCT256X128: {
766
0
      ReinterpretingIDCT<
767
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/16 * kBlockDim,
768
0
          /*LF_ROWS=*/32, /*LF_COLS=*/16, /*ROWS=*/32, /*COLS=*/16>(
769
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
770
0
      break;
771
0
    }
772
0
    case Type::DCT128X256: {
773
0
      ReinterpretingIDCT<
774
0
          /*DCT_ROWS=*/16 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
775
0
          /*LF_ROWS=*/16, /*LF_COLS=*/32, /*ROWS=*/16, /*COLS=*/32>(
776
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
777
0
      break;
778
0
    }
779
0
    case Type::DCT256X256: {
780
0
      ReinterpretingIDCT<
781
0
          /*DCT_ROWS=*/32 * kBlockDim, /*DCT_COLS=*/32 * kBlockDim,
782
0
          /*LF_ROWS=*/32, /*LF_COLS=*/32, /*ROWS=*/32, /*COLS=*/32>(
783
0
          block, 32 * kBlockDim, dc, dc_stride, scratch_space);
784
0
      break;
785
0
    }
786
22.5M
    case Type::DCT:
787
23.5M
    case Type::DCT2X2:
788
23.5M
    case Type::DCT4X4:
789
23.9M
    case Type::DCT4X8:
790
24.3M
    case Type::DCT8X4:
791
24.6M
    case Type::AFV0:
792
25.3M
    case Type::AFV1:
793
25.5M
    case Type::AFV2:
794
25.9M
    case Type::AFV3:
795
26.9M
    case Type::IDENTITY:
796
26.9M
      dc[0] = block[0];
797
26.9M
      break;
798
28.4M
  }
799
28.4M
}
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_ZEN4::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_AVX3_SPR::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
Unexecuted instantiation: enc_chroma_from_luma.cc:jxl::N_SSE2::(anonymous namespace)::DCFromLowestFrequencies(jxl::AcStrategyType, float const*, float*, unsigned long, float*)
800
801
}  // namespace
802
// NOLINTNEXTLINE(google-readability-namespace-comments)
803
}  // namespace HWY_NAMESPACE
804
}  // namespace jxl
805
HWY_AFTER_NAMESPACE();
806
807
#endif  // LIB_JXL_ENC_TRANSFORMS_INL_H_